├── .github └── workflows │ ├── collate.yml │ ├── deploy.yml │ └── test.yml ├── .gitignore ├── .yamllint.yaml ├── README.md ├── assets ├── 01ai.yaml ├── 360.yaml ├── adept.yaml ├── adobe.yaml ├── ai2.yaml ├── ai21.yaml ├── aleph_alpha.yaml ├── alibaba.yaml ├── amazon.yaml ├── anthropic.yaml ├── apple.yaml ├── argilla.yaml ├── argonne.yaml ├── aspia_space_institu.yaml ├── assembly.yaml ├── autogpt.yaml ├── avignon.yaml ├── baai.yaml ├── baichuan.yaml ├── baidu.yaml ├── bain.yaml ├── be_my_eyes.yaml ├── beitech.yaml ├── berkeley.yaml ├── bigcode.yaml ├── bigscience.yaml ├── bloomberg.yaml ├── boston.yaml ├── brex.yaml ├── bytedance.yaml ├── cagliostro.yaml ├── cartesia.yaml ├── casia.yaml ├── causallm.yaml ├── cerebras.yaml ├── character.yaml ├── chatglm.yaml ├── cmu.yaml ├── cognition.yaml ├── cognitive.yaml ├── cohere.yaml ├── columbia.yaml ├── compute_01_research.yaml ├── continue.yaml ├── cresta.yaml ├── databricks.yaml ├── deci.yaml ├── deepmind.yaml ├── deepnight.yaml ├── deepseek.yaml ├── dibt.yaml ├── duckduckgo.yaml ├── duolingo.yaml ├── eleutherai.yaml ├── ens.yaml ├── epfl.yaml ├── evolutionaryscale.yaml ├── faraday.yaml ├── fuse.yaml ├── genmo.yaml ├── glm.yaml ├── google.yaml ├── greenbit.yaml ├── h2oai.yaml ├── hubspot.yaml ├── huggingface.yaml ├── ibm.yaml ├── idea.yaml ├── ideogram.yaml ├── inflection.yaml ├── instacart.yaml ├── internlm.yaml ├── juni.yaml ├── kaist.yaml ├── kakaobrain.yaml ├── khan.yaml ├── konan.yaml ├── kotoba.yaml ├── ktai.yaml ├── kunlun.yaml ├── laion.yaml ├── laion_ev.yaml ├── latitude.yaml ├── lehigh.yaml ├── lg.yaml ├── lg_ai_research.yaml ├── linkedin.yaml ├── llm360.yaml ├── lmsys.yaml ├── mathai.yaml ├── maya.yaml ├── meta.yaml ├── microsoft.yaml ├── mila.yaml ├── mistral.yaml ├── moonhub.yaml ├── moreh.yaml ├── mosaic.yaml ├── nanyang.yaml ├── naver.yaml ├── ncsoft.yaml ├── neeva.yaml ├── nextdoor.yaml ├── nolano.yaml ├── notion.yaml ├── nous.yaml ├── nucleus.yaml ├── nvidia.yaml ├── oasst.yaml ├── ollama.yaml ├── openai.yaml ├── openbmb.yaml ├── openlemur.yaml ├── openx.yaml ├── orion.yaml ├── osu.yaml ├── othersideai.yaml ├── oxford.yaml ├── paladin.yaml ├── peking.yaml ├── perplexity.yaml ├── pleias.yaml ├── portkey.yaml ├── quizlet.yaml ├── quora.yaml ├── qwen_team.yaml ├── rakuten.yaml ├── reexpress.yaml ├── reka.yaml ├── robin.yaml ├── runway_ai,_inc..yaml ├── rwkv.yaml ├── salesforce.yaml ├── samba.yaml ├── sana.yaml ├── sciphi.yaml ├── shanghai.yaml ├── shop.yaml ├── singapore.yaml ├── skt.yaml ├── snap.yaml ├── soochow.yaml ├── speak.yaml ├── spotify.yaml ├── stability.yaml ├── stability_ai.yaml ├── stanford.yaml ├── stonybrook.yaml ├── suno.yaml ├── tiger.yaml ├── together.yaml ├── tokyo.yaml ├── toronto.yaml ├── transformify.yaml ├── trevor.yaml ├── triml.yaml ├── tsinghua.yaml ├── twelvelabs.yaml ├── uae.yaml ├── unknown.yaml ├── uw.yaml ├── uwashington.yaml ├── vago.yaml ├── viable.yaml ├── vilm.yaml ├── wayve.yaml ├── writer.yaml ├── xai.yaml ├── xverse.yaml ├── xwin.yaml ├── yandex.yaml ├── you.yaml └── zhejiang.yaml ├── components ├── home.html ├── nav.html └── table.html ├── css └── main.css ├── img ├── icon.svg └── info-icon.png ├── index.html ├── js ├── general.js ├── main.js └── schemas.yaml ├── pre-commit.sh ├── requirements.txt ├── resources ├── .gitkeep └── all_assets.csv ├── scripts └── collate.py ├── server.py └── yamlfix.toml /.github/workflows/collate.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Collate 3 | 4 | "on": 5 | workflow_dispatch: 6 | push: 7 | paths: 8 | - assets/** 9 | 10 | jobs: 11 | collate-assets: 12 | name: Collate all assets into single file 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v3 17 | with: 18 | fetch-depth: 0 19 | 20 | - name: Setup Python 21 | uses: actions/setup-python@v5 22 | with: 23 | python-version: '3.9' 24 | 25 | - name: Install dependencies 26 | run: python -m pip install --upgrade pip pandas pyyaml 27 | 28 | - name: collate 29 | run: |- 30 | ./scripts/collate.py 31 | 32 | - name: commit 33 | run: |- 34 | git config user.name "GitHub Actions Bot" 35 | git config user.email "<>" 36 | git add resources/all_assets.csv 37 | git commit -m "Update collated assets CSV." 38 | git push origin main 39 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Deploy 3 | 4 | "on": 5 | push: 6 | branches: main 7 | 8 | jobs: 9 | # Check out the main branch of the development repository and force update the 10 | # main of the website repository. Setting fetch-depth to 0 allows us to 11 | # retrieve all the history for the specified branch. 12 | update-website: 13 | name: Update Website 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v3 18 | with: 19 | fetch-depth: 0 20 | 21 | - name: Setup SSH 22 | uses: MrSquaare/ssh-setup-action@v1 23 | with: 24 | host: github.com 25 | private-key: ${{ secrets.WEBSITE_DEPLOY_SSH_PRIVATE_KEY }} 26 | 27 | - name: Update the github-pages branch of the website repository 28 | env: 29 | COMMIT_AUTHOR: Stanford-CRFM-Website 30 | run: | 31 | # Set up Git configuration 32 | git config --global user.name "${{ env.COMMIT_AUTHOR }}" 33 | git config --global user.email "username@users.noreply.github.com" 34 | 35 | # Update the website repository 36 | git remote add website git+ssh://git@github.com/stanford-crfm-website/ecosystem-graphs.git 37 | git push -u website main 38 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Test 3 | 4 | "on": 5 | push: 6 | branches: main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | format: 13 | name: Format YAML files 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v3 18 | 19 | - name: Set up Python 3.8 20 | uses: actions/setup-python@v1 21 | with: 22 | python-version: 3.8 23 | 24 | - name: Validate format 25 | run: sh ./pre-commit.sh && git diff --exit-code 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Development Environment 2 | .DS_Store 3 | .vscode 4 | 5 | # Group specific 6 | nav 7 | tags -------------------------------------------------------------------------------- /.yamllint.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | yaml-files: 4 | - '*.yaml' 5 | - '*.yml' 6 | - '.yamllint' 7 | 8 | rules: 9 | braces: enable 10 | brackets: enable 11 | colons: enable 12 | commas: enable 13 | comments: 14 | level: warning 15 | comments-indentation: 16 | level: warning 17 | document-end: disable 18 | document-start: 19 | level: warning 20 | empty-lines: enable 21 | empty-values: disable 22 | hyphens: enable 23 | indentation: 24 | level: warning 25 | key-duplicates: enable 26 | key-ordering: disable 27 | line-length: disable 28 | new-line-at-end-of-file: enable 29 | new-lines: enable 30 | octal-values: disable 31 | quoted-strings: disable 32 | trailing-spaces: enable 33 | truthy: 34 | level: warning 35 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ecosystem Graphs 2 | 3 | ## Overview 4 | Welcome! Ecosystem Graphs is an ongoing effort to track the foundation model ecosystem, namely both the assets (datasets, models, and applications) and their relationships. Using it, one can answer questions such as: What are the latest foundation models? Who builds them and where are they used downstream? What are the general trends over time? We hope that ecosystem graphs will be a useful resource for researchers, application developers, policymakers, and the public to better understand the foundation models ecosystem. 5 | 6 | To explore the ecosystem, check out the [website](https://crfm.stanford.edu/ecosystem-graphs/) or read the [paper](https://arxiv.org/abs/2303.15772). 7 | 8 | Briefly, an ecosystem graph is defined by: 9 | - **Assets.** These are the nodes in the graph, which can be datasets (e.g. The Pile), models (e.g. Stable Diffusion), or applications (e.g. Microsoft Word). 10 | - **Dependencies.** These are the edges in the graph, which indicate how assets are built (e.g. the BLOOM model is trained on the ROOTS dataset). 11 | - **Ecosystem cards.** These are structured cards that house metadata on each asset (e.g. who built it, when, what is the license). 12 | 13 | ## Contribute 14 | We actively encourage community contributions. To contribute: 15 | - Add assets by filling out [this form](https://forms.gle/VqnSsZhv62hJ5rP36). No coding expertise required! 16 | - Submit a PR (run `precommit.sh` before submitting) 17 | 18 | To visualize and explore your changes, start a local server: 19 | 20 | python server.py 21 | 22 | and navigate to [http://localhost:8000](http://localhost:8000). 23 | 24 | ## Cite as 25 | 26 | ``` 27 | @article{bommasani2023ecosystem-graphs, 28 | author = {Bommasani, Rishi and 29 | Soylu, Dilara and 30 | Liao, Thomas I. and 31 | Creel, Kathleen A. and 32 | Liang, Percy}, 33 | title = {Ecosystem Graphs: The Social Footprint of Foundation Models}, 34 | month = mar, 35 | year = 2023, 36 | url = {https://arxiv.org/abs/2303.15772} 37 | } 38 | ``` 39 | -------------------------------------------------------------------------------- /assets/01ai.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on standard language benchmarks, common sense reasoning, and 3 | reading comprehension in comparison to SoTA LLMs. 4 | created_date: 2023-11-02 5 | dependencies: [] 6 | description: The Yi series models are large language models trained from scratch 7 | by developers at 01 AI. 8 | feedback: https://huggingface.co/01-ai/Yi-34B/discussions 9 | intended_uses: '' 10 | license: 11 | explanation: Model license can be found at https://huggingface.co/01-ai/Yi-34B/blob/main/LICENSE. 12 | Code license is under Apache 2.0 13 | value: custom 14 | modality: text; text 15 | model_card: https://huggingface.co/01-ai/Yi-34B 16 | monitoring: unknown 17 | name: Yi 18 | nationality: France 19 | organization: 01 AI 20 | prohibited_uses: none 21 | quality_control: Model underwent supervised fine-tuning, leading to a greater diversity 22 | of responses. 23 | size: 34B parameters (dense) 24 | training_emissions: unknown 25 | training_hardware: unknown 26 | training_time: unknown 27 | type: model 28 | url: https://github.com/01-ai/Yi 29 | - access: open 30 | analysis: Yi-VL outperforms all existing open-source models in MMMU and CMMMU, two 31 | advanced benchmarks that include massive multi-discipline multimodal questions 32 | (based on data available up to January 2024). 33 | created_date: 2024-01-23 34 | dependencies: [] 35 | description: The Yi Vision Language (Yi-VL) model is the open-source, multimodal 36 | version of the Yi Large Language Model (LLM) series, enabling content comprehension, 37 | recognition, and multi-round conversations about images. 38 | feedback: https://huggingface.co/01-ai/Yi-VL-34B/discussions 39 | intended_uses: '' 40 | license: 41 | explanation: Model license can be found at https://huggingface.co/01-ai/Yi-VL-34B/blob/main/LICENSE. 42 | Code license is under Apache 2.0 43 | value: custom 44 | modality: text; text 45 | model_card: https://huggingface.co/01-ai/Yi-VL-34B 46 | monitoring: unknown 47 | name: Yi-VL 48 | nationality: France 49 | organization: 01 AI 50 | prohibited_uses: '' 51 | quality_control: unknown 52 | size: 34B parameters (dense) 53 | training_emissions: unknown 54 | training_hardware: 128 NVIDIA A800 (80G) GPUs 55 | training_time: 10 days 56 | type: model 57 | url: https://github.com/01-ai/Yi 58 | -------------------------------------------------------------------------------- /assets/360.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Achieved competitive performance on relevant benchmarks against other 3 | 7B models in Chinese, English, and coding tasks. 4 | created_date: 2024-05-23 5 | dependencies: [] 6 | description: 360 Zhinao is a multilingual LLM in Chinese and English with chat capabilities. 7 | feedback: none 8 | intended_uses: '' 9 | license: unknown 10 | modality: text; text 11 | model_card: none 12 | monitoring: '' 13 | name: 360 Zhinao 14 | nationality: China 15 | organization: 360 Security 16 | prohibited_uses: '' 17 | quality_control: '' 18 | size: 7B parameters 19 | training_emissions: unknown 20 | training_hardware: unknwon 21 | training_time: unknown 22 | type: model 23 | url: https://arxiv.org/pdf/2405.13386 24 | -------------------------------------------------------------------------------- /assets/adept.yaml: -------------------------------------------------------------------------------- 1 | - access: closed 2 | analysis: '' 3 | created_date: 4 | explanation: The date the model was announced in the [[Adept blog post]](https://www.adept.ai/blog/act-1). 5 | value: 2022-09-14 6 | dependencies: [] 7 | description: ACT-1 (ACtion Transformer) is a large-scale transformer model designed 8 | and trained specifically for taking actions on computers (use software tools APIs 9 | and websites) in response to the user's natural language commands. 10 | feedback: '' 11 | intended_uses: '' 12 | license: unknown 13 | modality: text; text 14 | model_card: none 15 | monitoring: '' 16 | name: ACT-1 17 | nationality: USA 18 | organization: Adept 19 | prohibited_uses: '' 20 | quality_control: '' 21 | size: '' 22 | training_emissions: unknown 23 | training_hardware: unknown 24 | training_time: unknown 25 | type: model 26 | url: https://www.adept.ai/blog/act-1 27 | - access: open 28 | analysis: Evaluated in comparison to LLaMA 2 and MPT Instruct, and outperforms both 29 | on standard benchmarks. 30 | created_date: 2023-09-07 31 | dependencies: [] 32 | description: Persimmon is the most capable open-source, fully permissive model with 33 | fewer than 10 billion parameters, as of its release date. 34 | feedback: '' 35 | intended_uses: '' 36 | license: Apache 2.0 37 | modality: text; text 38 | model_card: '' 39 | monitoring: '' 40 | name: Persimmon 41 | nationality: USA 42 | organization: Adept 43 | prohibited_uses: '' 44 | quality_control: '' 45 | size: 8B parameters (dense) 46 | training_emissions: '' 47 | training_hardware: '' 48 | training_time: '' 49 | type: model 50 | url: https://www.adept.ai/blog/persimmon-8b 51 | - access: open 52 | analysis: Evaluated on standard image understanding benchmarks. 53 | created_date: 2023-10-17 54 | dependencies: [] 55 | description: Fuyu is a small version of the multimodal model that powers Adept's 56 | core product. 57 | feedback: https://huggingface.co/adept/fuyu-8b/discussions 58 | intended_uses: The model is intended for research purposes only. 59 | license: CC-BY-NC-4.0 60 | modality: image, text; text 61 | model_card: https://huggingface.co/adept/fuyu-8b 62 | monitoring: '' 63 | name: Fuyu 64 | nationality: USA 65 | organization: Adept 66 | prohibited_uses: The model was not trained to be factual or true representations 67 | of people or events, and therefore using the model to generate such content is 68 | out-of-scope for the abilities of this model. 69 | quality_control: none 70 | size: 8B parameters (dense) 71 | training_emissions: unknown 72 | training_hardware: unknown 73 | training_time: unknown 74 | type: model 75 | url: https://www.adept.ai/blog/fuyu-8b 76 | - access: closed 77 | analysis: Evaluated on the MMLU, GSM8K, MATH, and HumanEval benchmarks. According 78 | to these benchmarks, Fuyu-Heavy is, as of release, the strongest multimodal model 79 | trained outside of Google or OpenAI. 80 | created_date: 2024-01-24 81 | dependencies: [] 82 | description: Fuyu Heavy is a new multimodal model designed specifically for digital 83 | agents. 84 | feedback: none 85 | intended_uses: unknown 86 | license: unknown 87 | modality: image, text; text 88 | model_card: none 89 | monitoring: '' 90 | name: Fuyu Heavy 91 | nationality: USA 92 | organization: Adept 93 | prohibited_uses: none 94 | quality_control: none 95 | size: 96 | explanation: The size of the model is 10-20 times smaller than GPT-4V and Gemini 97 | Ultra, as per announcement. 98 | value: unknown 99 | training_emissions: unknown 100 | training_hardware: unknown 101 | training_time: unknown 102 | type: model 103 | url: https://www.adept.ai/blog/adept-fuyu-heavy 104 | -------------------------------------------------------------------------------- /assets/apple.yaml: -------------------------------------------------------------------------------- 1 | - access: closed 2 | analysis: Evaluated on image captioning and visual question answering across many 3 | benchmarks. 4 | created_date: 2024-03-16 5 | dependencies: [] 6 | description: MM1 is a family of multimodal models, including both dense variants 7 | up to 30B and mixture-of-experts (MoE) variants up to 64B. 8 | feedback: none 9 | intended_uses: '' 10 | license: unknown 11 | modality: image, text; text 12 | model_card: none 13 | monitoring: '' 14 | name: MM1 15 | nationality: USA 16 | organization: Apple 17 | prohibited_uses: '' 18 | quality_control: '' 19 | size: 30B parameters (dense) 20 | training_emissions: unknown 21 | training_hardware: unknown 22 | training_time: unknown 23 | type: model 24 | url: https://arxiv.org/pdf/2403.09611.pdf 25 | - access: open 26 | analysis: The models were evaluated in terms of zero-shot, LLM360, and OpenLLM leaderboard 27 | results. 28 | created_date: 2024-04-24 29 | dependencies: 30 | - RefinedWeb 31 | - The Pile 32 | - RedPajama-Data 33 | - Dolma 34 | - CoreNet library 35 | description: OpenELM is a family of Open-source Efficient Language Models. It uses 36 | a layer-wise scaling strategy to efficiently allocate parameters within each layer 37 | of the transformer model, leading to enhanced accuracy. 38 | feedback: https://huggingface.co/apple/OpenELM-3B-Instruct/discussions 39 | intended_uses: To empower and enrich the open research community by providing access 40 | to state-of-the-art language models. 41 | license: Apple 42 | modality: text; text 43 | model_card: https://huggingface.co/apple/OpenELM-3B-Instruct 44 | monitoring: none 45 | name: OpenELM 46 | nationality: USA 47 | organization: Apple 48 | prohibited_uses: No explicit prohibited uses stated, though it is noted that users 49 | should undertake thorough safety testing. 50 | quality_control: unknown 51 | size: 3B parameters 52 | training_emissions: unknown 53 | training_hardware: unknown 54 | training_time: unknown 55 | type: model 56 | url: https://machinelearning.apple.com/research/openelm 57 | - access: 58 | explanation: We release code & weights at https://github.com/apple/ml-depth-pro 59 | value: open 60 | analysis: Extensive experiments analyze specific design choices and demonstrate 61 | that Depth Pro outperforms prior work along multiple dimensions. 62 | created_date: 2024-10-10 63 | dependencies: [] 64 | description: We present a foundation model for zero-shot metric monocular depth 65 | estimation. Our model, Depth Pro, synthesizes high-resolution depth maps with 66 | unparalleled sharpness and high-frequency details... The model is fast, producing 67 | a 2.25-megapixel depth map in 0.3 seconds on a standard GPU. 68 | feedback: unknown 69 | intended_uses: Zero-shot monocular depth estimation underpins a growing variety 70 | of applications, such as advanced image editing, view synthesis, and conditional 71 | image generation. 72 | license: unknown 73 | modality: 74 | explanation: We present a foundation model for zero-shot metric monocular depth 75 | estimation. 76 | value: text; depth maps 77 | model_card: unknown 78 | monitoring: unknown 79 | name: Depth Pro 80 | nationality: USA 81 | organization: Apple 82 | prohibited_uses: unknown 83 | quality_control: dedicated evaluation metrics for boundary accuracy in estimated 84 | depth maps, and state-of-the-art focal length estimation from a single image. 85 | size: unknown 86 | training_emissions: unknown 87 | training_hardware: 88 | explanation: "2.25-megapixel depth maps with a native output resolution of 1536\ 89 | \ \xD7 1536 in 0.3 seconds on a V100 GPU." 90 | value: V100 GPU 91 | training_time: unknown 92 | type: model 93 | url: https://arxiv.org/pdf/2410.02073 94 | -------------------------------------------------------------------------------- /assets/argilla.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on MT-Bench and AlphaEval benchmarks. 3 | created_date: 2023-12-01 4 | dependencies: 5 | - UltraFeedback 6 | - Zephyr 7 | description: Notus is an open source LLM, fine-tuned using Direct Preference Optimization 8 | (DPO) and AIF (AI Feedback) techniques. 9 | feedback: https://huggingface.co/argilla/notus-7b-v1/discussions 10 | intended_uses: Academic research and free commercial usage 11 | license: MIT 12 | modality: text; text 13 | model_card: https://huggingface.co/argilla/notus-7b-v1 14 | monitoring: none 15 | name: Notus 16 | nationality: Belgium 17 | organization: Argilla 18 | prohibited_uses: '' 19 | quality_control: '' 20 | size: 7B parameters (dense) 21 | training_emissions: unknown 22 | training_hardware: 8 x A100 40GB GPUs 23 | training_time: unknown 24 | type: model 25 | url: https://argilla.io/blog/notus7b/ 26 | -------------------------------------------------------------------------------- /assets/argonne.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: '' 3 | created_date: 2022-10-11 4 | dependencies: 5 | - SARS-CoV-2 genome dataset 6 | - BV-BRC dataset 7 | description: '' 8 | feedback: '' 9 | intended_uses: '' 10 | license: MIT 11 | modality: text; genome sequence 12 | model_card: '' 13 | monitoring: '' 14 | name: GenSLM 15 | nationality: USA 16 | organization: Argonne National Laboratory 17 | prohibited_uses: '' 18 | quality_control: '' 19 | size: 25B parameters (dense) 20 | training_emissions: '' 21 | training_hardware: '' 22 | training_time: '' 23 | type: model 24 | url: https://www.biorxiv.org/content/10.1101/2022.10.10.511571v1 25 | -------------------------------------------------------------------------------- /assets/aspia_space_institu.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: "The models\u2019 performance on downstream tasks was evaluated by linear\ 3 | \ probing. The models follow a similar saturating log-log scaling law to textual\ 4 | \ models, their performance improves with the increase in model size up to the\ 5 | \ saturation point of parameters." 6 | created_date: 2024-09-08 7 | dependencies: 8 | - DESI Legacy Survey DR8 9 | description: AstroPT is an autoregressive pretrained transformer developed with 10 | astronomical use-cases in mind. The models have been pretrained on 8.6 million 11 | 512x512 pixel grz-band galaxy postage stamp observations from the DESI Legacy 12 | Survey DR8. They have created a range of models with varying complexity, ranging 13 | from 1 million to 2.1 billion parameters. 14 | feedback: Any problem with the model can be reported to Michael J. Smith at mike@mjjsmith.com. 15 | intended_uses: The models are intended for astronomical use-cases, particularly 16 | in handling and interpreting large observation data from astronomical sources. 17 | license: MIT 18 | modality: image; image 19 | model_card: unknown 20 | monitoring: Unknown 21 | name: AstroPT 22 | nationality: unknown 23 | organization: "Aspia Space, Instituto de Astrof\xEDsica de Canarias (IAC), UniverseTBD,\ 24 | \ Astrophysics Research Institute, Liverpool John Moores University, Departamento\ 25 | \ Astrof\xEDsica, Universidad de la Laguna, Observatoire de Paris, LERMA, PSL\ 26 | \ University, and Universit\xB4e Paris-Cit\xB4e." 27 | prohibited_uses: Unknown 28 | quality_control: "The models\u2019 performances were evaluated on downstream tasks\ 29 | \ as measured by linear probing." 30 | size: 2.1B parameters 31 | training_emissions: Unknown 32 | training_hardware: Unknown 33 | training_time: Unknown 34 | type: model 35 | url: https://arxiv.org/pdf/2405.14930v1 36 | -------------------------------------------------------------------------------- /assets/autogpt.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | adaptation: GPT-4 adapted to run autonomously by chaining together LLM "thoughts" 3 | created_date: 2023-04-16 4 | dependencies: 5 | - GPT-4 API 6 | description: Auto-GPT is an experimental open-source application showcasing the 7 | capabilities of the GPT-4 language model. 8 | failures: '' 9 | feedback: '' 10 | intended_uses: '' 11 | license: MIT 12 | monitoring: '' 13 | monthly_active_users: '' 14 | name: Auto-GPT 15 | nationality: USA 16 | organization: Auto-GPT 17 | output_space: text 18 | prohibited_uses: '' 19 | quality_control: '' 20 | terms_of_service: '' 21 | type: application 22 | url: https://news.agpt.co/ 23 | user_distribution: '' 24 | -------------------------------------------------------------------------------- /assets/avignon.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: BioMistral was evaluated on a benchmark comprising 10 established medical 3 | question-answering (QA) tasks in English and seven other languages. 4 | created_date: 2024-02-15 5 | dependencies: 6 | - Mistral 7 | - PubMed Central 8 | description: BioMistral is an open-source Large Language Model tailored for the 9 | biomedical domain, utilizing Mistral as its foundation model and further pre-trained 10 | on PubMed Central. 11 | feedback: https://huggingface.co/BioMistral/BioMistral-7B/discussions 12 | intended_uses: Research in the biomedical domain, especially for medical question-answering 13 | tasks. 14 | license: Apache 2.0 15 | modality: text; text 16 | model_card: https://huggingface.co/BioMistral/BioMistral-7B 17 | monitoring: '' 18 | name: BioMistral 19 | nationality: unknown 20 | organization: Avignon University, Nantes University 21 | prohibited_uses: Prohibited from deploying in production environments for natural 22 | language generation or any professional health and medical purposes. 23 | quality_control: '' 24 | size: 7B parameters (dense) 25 | training_emissions: unknown 26 | training_hardware: 32 NVIDIA A100 80GB GPUs 27 | training_time: unknown 28 | type: model 29 | url: https://arxiv.org/pdf/2402.10373.pdf 30 | -------------------------------------------------------------------------------- /assets/baichuan.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on public benchmarks like MMLU, CMMLU, GSM8K, and HumanEval. 3 | created_date: 2023-09-20 4 | dependencies: [] 5 | description: Baichuan 2 is a series of large-scale multilingual language models 6 | containing 7 billion and 13 billion parameters, trained from scratch, on 2.6 trillion 7 | tokens. 8 | feedback: https://huggingface.co/Xwin-LM/Xwin-LM-70B-V0.1/discussions 9 | intended_uses: '' 10 | license: Apache 2.0 11 | modality: text; text 12 | model_card: none 13 | monitoring: none 14 | name: Baichuan 2 15 | nationality: China 16 | organization: Baichuan Inc. 17 | prohibited_uses: '' 18 | quality_control: '' 19 | size: 13B parameters (dense) 20 | training_emissions: unknown 21 | training_hardware: 1024 NVIDIA A800 GPUs 22 | training_time: unknown 23 | type: model 24 | url: https://arxiv.org/pdf/2309.10305.pdf 25 | -------------------------------------------------------------------------------- /assets/bain.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | adaptation: '' 3 | created_date: 2023-02-21 4 | dependencies: 5 | - ChatGPT API 6 | description: "With the alliance, Bain will combine its deep digital implementation\ 7 | \ capabilities and strategic expertise with OpenAI\u2019s AI tools and platforms,\ 8 | \ including ChatGPT, to help its clients around the world identify and implement\ 9 | \ the value of AI to maximize business potential." 10 | failures: '' 11 | feedback: '' 12 | intended_uses: '' 13 | license: unknown 14 | monitoring: '' 15 | monthly_active_users: '' 16 | name: Bain Chat 17 | nationality: USA 18 | organization: Bain 19 | output_space: '' 20 | prohibited_uses: '' 21 | quality_control: '' 22 | terms_of_service: '' 23 | type: application 24 | url: https://www.bain.com/vector-digital/partnerships-alliance-ecosystem/openai-alliance/ 25 | user_distribution: '' 26 | -------------------------------------------------------------------------------- /assets/be_my_eyes.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | adaptation: '' 3 | created_date: 2023-03-14 4 | dependencies: 5 | - GPT-4 API 6 | description: "The first-ever digital visual assistant powered by OpenAI\u2019s new\ 7 | \ GPT-4 language model." 8 | failures: '' 9 | feedback: '' 10 | intended_uses: '' 11 | license: unknown 12 | monitoring: '' 13 | monthly_active_users: '' 14 | name: Virtual Volunteer 15 | nationality: Denmark 16 | organization: Be My Eyes 17 | output_space: '' 18 | prohibited_uses: '' 19 | quality_control: '' 20 | terms_of_service: '' 21 | type: application 22 | url: https://www.bemyeyes.com/blog/introducing-be-my-eyes-virtual-volunteer 23 | user_distribution: '' 24 | -------------------------------------------------------------------------------- /assets/beitech.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Models fine-tuned on EXMODD and earlier dataset Image-Chat and then evaluated 3 | on Image-Chat validation set. 4 | created_date: 2023-10-17 5 | datasheet: none 6 | dependencies: 7 | - YFCC100M 8 | - Image-Chat 9 | description: EXMODD (Explanatory Multimodal Open-Domain Dialogue dataset) is a dataset 10 | built off the proposed MDCF (Multimodal Data Construction Framework). 11 | excluded: '' 12 | feedback: Feedback can be sent to authors via poplpr@bit.edu.cn 13 | included: '' 14 | intended_uses: '' 15 | license: MIT 16 | modality: image, text 17 | monitoring: none 18 | name: EXMODD 19 | nationality: China 20 | organization: Beijing Institute of Technology 21 | prohibited_uses: '' 22 | quality_control: '' 23 | sample: [] 24 | size: unknown 25 | type: dataset 26 | url: https://arxiv.org/pdf/2310.10967.pdf 27 | - access: open 28 | analysis: Evaluated on standard benchmarks including MMLU, CEval, and DROP. 29 | created_date: 2023-11-13 30 | dependencies: 31 | - Llama 2 32 | description: MiniMA is a smaller finetuned Llama 2 model adapted for Chinese. 33 | feedback: https://huggingface.co/GeneZC/MiniMA-3B/discussions 34 | intended_uses: '' 35 | license: Llama 2 36 | modality: text; text 37 | model_card: https://huggingface.co/GeneZC/MiniMA-3B 38 | monitoring: unknokwn 39 | name: MiniMA 40 | nationality: China 41 | organization: Beijing Institute of Technology 42 | prohibited_uses: '' 43 | quality_control: '' 44 | size: 3B parameters (dense) 45 | training_emissions: unknown 46 | training_hardware: 8 A100 80G GPUs 47 | training_time: unknown 48 | type: model 49 | url: https://github.com/GeneZC/MiniMA 50 | -------------------------------------------------------------------------------- /assets/berkeley.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated in comparison with ChatGPT and Stanford Alpaca. 3 | created_date: 2023-04-03 4 | dependencies: 5 | - LLaMA 6 | - web-scraped dialogue data 7 | description: "A relatively small chatbot trained by fine-tuning Meta\u2019s LLaMA\ 8 | \ on dialogue data gathered from the web." 9 | feedback: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g/discussions 10 | intended_uses: academic research 11 | license: Apache 2.0 12 | modality: text; text 13 | model_card: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g 14 | monitoring: '' 15 | name: Koala 16 | nationality: USA 17 | organization: Berkeley 18 | prohibited_uses: '' 19 | quality_control: '' 20 | size: 13B parameters (dense) 21 | training_emissions: '' 22 | training_hardware: 8 A100 GPUs 23 | training_time: 6 hours 24 | type: model 25 | url: https://bair.berkeley.edu/blog/2023/04/03/koala/ 26 | - access: open 27 | analysis: Evaluated using AST sub-tree matching technique and compared to other 28 | models in terms of API functionality accuracy. 29 | created_date: 2023-05-24 30 | dependencies: 31 | - LLaMA 32 | - Gorilla document retriever 33 | description: Gorilla is a finetuned LLaMA-based model that surpasses the performance 34 | of GPT-4 on writing API calls. 35 | feedback: '' 36 | intended_uses: In conjunction with a LLM to improve its capability for using API 37 | calls. 38 | license: Apache 2.0 39 | modality: text; API 40 | model_card: '' 41 | monitoring: '' 42 | name: Gorilla 43 | nationality: USA 44 | organization: Berkeley 45 | prohibited_uses: '' 46 | quality_control: No specific quality control is mentioned in model training, though 47 | details on data processing and collection are provided in the paper. 48 | size: 7B parameters (dense) 49 | training_emissions: '' 50 | training_hardware: '' 51 | training_time: '' 52 | type: model 53 | url: https://arxiv.org/pdf/2305.15334v1.pdf 54 | - access: open 55 | analysis: Evaluated on wide range of tasks using own evaluation benchmarks. 56 | created_date: 2023-05-03 57 | dependencies: 58 | - RedPajama 59 | description: OpenLlama is an open source reproduction of Meta's LLaMA model. 60 | feedback: '' 61 | intended_uses: '' 62 | license: Apache 2.0 63 | modality: text; text 64 | model_card: '' 65 | monitoring: '' 66 | name: OpenLLaMA 67 | nationality: USA 68 | organization: Berkeley 69 | prohibited_uses: '' 70 | quality_control: '' 71 | size: 17B parameters (dense) 72 | training_emissions: unknown 73 | training_hardware: '' 74 | training_time: unknown 75 | type: model 76 | url: https://github.com/openlm-research/open_llama 77 | -------------------------------------------------------------------------------- /assets/boston.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: "Platypus achieves the strongest performance and currently stands at first\ 3 | \ place in HuggingFace\u2019s Open LLM Leaderboard as of its release date." 4 | created_date: 2023-08-14 5 | dependencies: 6 | - LLaMA 2 7 | - Platypus curated dataset 8 | description: Platypus is a family of fine-tuned and merged Large Language Models 9 | (LLMs). 10 | feedback: https://huggingface.co/garage-bAInd/Platypus2-13B/discussions 11 | intended_uses: '' 12 | license: CC by-NC-SA 4.0 13 | modality: text; text 14 | model_card: https://huggingface.co/garage-bAInd/Platypus2-13B 15 | monitoring: '' 16 | name: Platypus 17 | nationality: USA 18 | organization: Boston University 19 | prohibited_uses: '' 20 | quality_control: '' 21 | size: 13B parameters (dense) 22 | training_emissions: '' 23 | training_hardware: 1 A100 GPU 24 | training_time: 5 hours 25 | type: model 26 | url: https://arxiv.org/pdf/2308.07317.pdf 27 | - access: open 28 | analysis: UFOGen is evaluated on standard image benchmarks against other models 29 | fine-tuned with Stable Diffusion. 30 | created_date: 2023-11-14 31 | dependencies: 32 | - Stable Diffusion 33 | description: UFOGen is a novel generative model designed for ultra-fast, one-step 34 | text-to-image synthesis. 35 | feedback: none 36 | intended_uses: '' 37 | license: unknown 38 | modality: text; image 39 | model_card: none 40 | monitoring: '' 41 | name: UFOGen 42 | nationality: USA 43 | organization: Boston University 44 | prohibited_uses: '' 45 | quality_control: '' 46 | size: 900M parameters (dense) 47 | training_emissions: unknown 48 | training_hardware: unknown 49 | training_time: unknown 50 | type: model 51 | url: https://arxiv.org/pdf/2311.09257.pdf 52 | -------------------------------------------------------------------------------- /assets/brex.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | adaptation: '' 3 | created_date: 2023-03-07 4 | dependencies: 5 | - ChatGPT API 6 | description: Brex Inc., a highly valued startup that makes software for finance 7 | professionals, is turning to the same artificial intelligence tool behind ChatGPT 8 | for a service that can answer questions about corporate budgets, policy and spending. 9 | failures: '' 10 | feedback: '' 11 | intended_uses: '' 12 | license: 13 | explanation: '"Brex grants you a nonexclusive and nontransferable license to use 14 | Brex Property as provided through the Services and as permitted by these User 15 | Terms." - excerpt from the Terms of Service. 16 | 17 | ' 18 | value: custom 19 | monitoring: '' 20 | monthly_active_users: '' 21 | name: Brex Chat 22 | nationality: USA 23 | organization: Brex 24 | output_space: '' 25 | prohibited_uses: '' 26 | quality_control: '' 27 | terms_of_service: https://www.brex.com/legal/user-terms 28 | type: application 29 | url: https://fortune.com/2023/03/07/cfo-chatbot-chatgpt-ai-brex-finance-software-startup-budgets-policies/ 30 | user_distribution: '' 31 | -------------------------------------------------------------------------------- /assets/cagliostro.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: unknown 3 | created_date: 2024-03-18 4 | dependencies: 5 | - Animagine XL 3.0 6 | description: An open-source, anime-themed text-to-image model enhanced to generate 7 | higher quality anime-style images with a broader range of characters from well-known 8 | anime series, an optimized dataset, and new aesthetic tags for better image creation. 9 | feedback: https://huggingface.co/cagliostrolab/animagine-xl-3.1/discussions 10 | intended_uses: Generating high-quality anime images from textual prompts. Useful 11 | for anime fans, artists, and content creators. 12 | license: Fair AI Public License 1.0-SD 13 | modality: text; image 14 | model_card: https://huggingface.co/cagliostrolab/animagine-xl-3.1 15 | monitoring: unknown 16 | name: Animagine XL 3.1 17 | nationality: USA 18 | organization: Cagliostro Research Lab 19 | prohibited_uses: Not suitable for creating realistic photos or for users who expect 20 | high-quality results from short or simple prompts. 21 | quality_control: The model undergoes pretraining, first stage finetuning, and second 22 | stage finetuning for refining and improving aspects such as hand and anatomy rendering. 23 | size: unknown 24 | training_emissions: unknown 25 | training_hardware: 2x A100 80GB GPUs 26 | training_time: Approximately 15 days, totaling over 350 GPU hours. 27 | type: model 28 | url: https://cagliostrolab.net/posts/animagine-xl-v31-release 29 | -------------------------------------------------------------------------------- /assets/cartesia.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | analysis: Extensive testing on Multilingual Librispeech dataset resulted in 20% 3 | lower validation perplexity. In downstream evaluations, this leads to a 2x lower 4 | word error rate and a 1 point higher quality score. Sonic also displays impressive 5 | performance metrics at inference, achieving lower latency (1.5x lower time-to-first-audio), 6 | faster inference speed (2x lower real-time factor), and higher throughput (4x). 7 | created_date: 2024-05-29 8 | dependencies: 9 | - Multilingual Librispeech dataset 10 | description: Sonic is a low-latency voice model that generates lifelike speech. 11 | Developed by Cartesia, it was designed to be an efficient real-time AI capable 12 | of processing any-sized contexts and running on any device. 13 | feedback: Contact through the provided form or via email at join@cartesia.ai. 14 | intended_uses: Sonic has potential applications across customer support, entertainment, 15 | and content creation and is a part of Cartesias broader mission to bring real-time 16 | multimodal intelligence to every device. 17 | license: unknown 18 | modality: text; audio 19 | model_card: none 20 | monitoring: unknown 21 | name: Sonic 22 | nationality: USA 23 | organization: Cartesia 24 | prohibited_uses: unknown 25 | quality_control: '' 26 | size: 2024-05-29 27 | training_emissions: unknown 28 | training_hardware: unknown 29 | training_time: unknown 30 | type: model 31 | url: https://cartesia.ai/blog/sonic 32 | -------------------------------------------------------------------------------- /assets/casia.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Reports results on standard translation benchmarks across 102 languages 3 | in comparison with Google Translate and ChatGPT 4 | created_date: 2023-05-29 5 | dependencies: 6 | - LLaMA 7 | - CLUE 8 | - BigTrans parallel dataset 9 | description: BigTrans is a model which adapts LLaMA that covers only 20 languages 10 | and enhances it with multilingual translation capability on more than 100 languages 11 | feedback: https://huggingface.co/James-WYang/BigTrans/discussions 12 | intended_uses: Advancing future research in multilingual LLMs 13 | license: Apache 2.0 14 | modality: text; text 15 | model_card: https://huggingface.co/James-WYang/BigTrans 16 | monitoring: '' 17 | name: BigTrans 18 | nationality: China 19 | organization: Institute of Automation Chinese Academy of Sciences 20 | prohibited_uses: '' 21 | quality_control: '' 22 | size: 13B parameters (dense) 23 | training_emissions: unknown 24 | training_hardware: 16 A100 GPUs with 80 GB of RAM 25 | training_time: unknown 26 | type: model 27 | url: https://arxiv.org/pdf/2305.18098v1.pdf 28 | - access: open 29 | analysis: Evaluated on standard benchmarks for knowledge and language understanding, 30 | mathematical reasoning, and programming ability in comparison to similarly sized 31 | open-source models. 32 | created_date: 2023-12-22 33 | dependencies: [] 34 | description: YAYI 2 is an open source large language model trained in both English 35 | and Chinese. 36 | feedback: https://huggingface.co/wenge-research/yayi2-30b/discussions 37 | intended_uses: '' 38 | license: 39 | explanation: Model is under a custom [license](https://github.com/wenge-research/YAYI2/blob/main/COMMUNITY_LICENSE), 40 | while code is Apache 2.0 41 | value: custom 42 | modality: text; text 43 | model_card: https://huggingface.co/wenge-research/yayi2-30b 44 | monitoring: '' 45 | name: YAYI 2 46 | nationality: China 47 | organization: Institute of Automation Chinese Academy of Sciences 48 | prohibited_uses: '' 49 | quality_control: data is deduplicated, normalized, cleaned, and filtered for toxicity 50 | size: 30B parameters (dense) 51 | training_emissions: unknown 52 | training_hardware: over 1000 A800 GPUs 53 | training_time: unknown 54 | type: model 55 | url: https://arxiv.org/pdf/2312.14862.pdf 56 | -------------------------------------------------------------------------------- /assets/causallm.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on standard benchmarks across a range of tasks. 3 | created_date: 2023-10-21 4 | dependencies: 5 | - Qwen 6 | - OpenOrca 7 | - Open Platypus 8 | description: CausalLM is an LLM based on the model weights of Qwen and trained on 9 | a model architecture identical to LLaMA 2. 10 | feedback: none 11 | intended_uses: '' 12 | license: 13 | explanation: can be found at https://github.com/rpherrera/WTFPL (HuggingFace lists 14 | this to be the license) 15 | value: WTFPL 16 | modality: text; text 17 | model_card: https://huggingface.co/CausalLM/14B 18 | monitoring: unknown 19 | name: CausalLM 20 | nationality: USA 21 | organization: CausalLM 22 | prohibited_uses: '' 23 | quality_control: '' 24 | size: 14B parameters (dense) 25 | training_emissions: unknown 26 | training_hardware: unknown 27 | training_time: unknown 28 | type: model 29 | url: https://huggingface.co/CausalLM/14B 30 | -------------------------------------------------------------------------------- /assets/character.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | adaptation: '' 3 | created_date: 2022-09-16 4 | dependencies: [] 5 | description: Character allows users to converse with various chatbot personas. 6 | failures: unknown 7 | feedback: '' 8 | intended_uses: '' 9 | license: unknown 10 | monitoring: '' 11 | monthly_active_users: unknown 12 | name: Character 13 | nationality: USA 14 | organization: Character AI 15 | output_space: AI-generated chat conversations 16 | prohibited_uses: '' 17 | quality_control: '' 18 | terms_of_service: https://beta.character.ai/tos 19 | type: application 20 | url: https://beta.character.ai/ 21 | user_distribution: unknown 22 | -------------------------------------------------------------------------------- /assets/chatglm.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Performance evaluated on English and Chinese language benchmark tests. 3 | created_date: 2023-03-14 4 | dependencies: [] 5 | description: ChatGLM is a Chinese-English language model with question and answer 6 | and dialogue functions, and is aimed at a Chinese audience. 7 | feedback: '' 8 | intended_uses: '' 9 | license: Apache 2.0 10 | modality: text; text 11 | model_card: '' 12 | monitoring: '' 13 | name: ChatGLM 14 | nationality: China 15 | organization: ChatGLM 16 | prohibited_uses: '' 17 | quality_control: '' 18 | size: 6B parameters (dense) 19 | training_emissions: unknown 20 | training_hardware: '' 21 | training_time: unknown 22 | type: model 23 | url: https://chatglm.cn/blog 24 | -------------------------------------------------------------------------------- /assets/cmu.yaml: -------------------------------------------------------------------------------- 1 | - access: 2 | explanation: Model checkpoints are available for download at https://github.com/VHellendoorn/Code-LMs 3 | value: open 4 | analysis: Reports results on standard code benchmarks across a variety of programming 5 | languages. 6 | created_date: 7 | explanation: The date the model paper was released 8 | value: 2022-02-26 9 | dependencies: 10 | - Github 11 | description: PolyCoder is a code model trained on 2.7B parameters based on the GPT-2 12 | architecture, which was trained on 249GB of code across 12 programming languages 13 | on a single machine. 14 | feedback: https://huggingface.co/NinedayWang/PolyCoder-2.7B/discussion 15 | intended_uses: unknown 16 | license: 17 | explanation: The license is provided in the [[Github repository]](https://github.com/VHellendoorn/Code-LMs) 18 | value: MIT 19 | modality: code 20 | model_card: https://huggingface.co/NinedayWang/PolyCoder-2.7B 21 | monitoring: None 22 | name: PolyCoder 23 | nationality: USA 24 | organization: Carnegie Mellon University 25 | prohibited_uses: None 26 | quality_control: No specific quality control is mentioned in model training, though 27 | details on data processing and how the tokenizer was trained are provided in the 28 | paper. 29 | size: 2.7B parameters (dense) 30 | training_emissions: unknown 31 | training_hardware: 8 NVIDIA RTX 8000 32 | training_time: 6 weeks 33 | type: model 34 | url: https://arxiv.org/abs/2202.13169 35 | - access: open 36 | analysis: Evaluated on nascent time-series datasets and benchmarks. 37 | created_date: 2024-02-06 38 | dependencies: [] 39 | description: Moment is a family of open-source foundation models for general-purpose 40 | time-series analysis. 41 | feedback: none 42 | intended_uses: '' 43 | license: unknown 44 | modality: '' 45 | model_card: none 46 | monitoring: unknown 47 | name: Moment 48 | nationality: unknown 49 | organization: Carnegie Mellon University, University of Pennsylvania 50 | prohibited_uses: '' 51 | quality_control: '' 52 | size: 385M parameters (dense) 53 | training_emissions: unknown 54 | training_hardware: Single A6000 GPU 55 | training_time: unknown 56 | type: model 57 | url: https://arxiv.org/pdf/2402.03885.pdf 58 | -------------------------------------------------------------------------------- /assets/cognition.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | analysis: Evaluated on SWE-Bench, a challenging software engineering benchmark, 3 | where Devin outperforms major state of the art models unassisted. 4 | created_date: 2024-03-12 5 | dependencies: [] 6 | description: "Devin is the world\u2019s first fully autonomous AI software engineer." 7 | feedback: none 8 | intended_uses: '' 9 | license: unknown 10 | modality: text; code 11 | model_card: none 12 | monitoring: '' 13 | name: Devin 14 | nationality: USA 15 | organization: Cognition Labs 16 | prohibited_uses: '' 17 | quality_control: '' 18 | size: unknown 19 | training_emissions: unknown 20 | training_hardware: unknown 21 | training_time: unknown 22 | type: model 23 | url: https://www.cognition-labs.com/introducing-devin 24 | - access: 25 | explanation: ESM C is a drop-in replacement for previous models that provides 26 | major improvements in both capability and efficiency. ESM C models are available 27 | immediately for academic and commercial use under a new license structure designed 28 | to promote openness and enable scientists and builders. 29 | value: open 30 | analysis: Evaluations. We use the methodology of Rao et al. to measure unsupervised 31 | learning of tertiary structure in the form of contact maps. A logistic regression 32 | is used to identify contacts. For a protein of length L, we evaluate the precision 33 | of the top L contacts (P@L) with sequence separation of 6 or more residues. 34 | created_date: 2024-12-04 35 | dependencies: 36 | - UniRef 37 | - MGnify 38 | - Joint Genome Institute 39 | description: ESM Cambrian is a next generation language model trained on protein 40 | sequences at the scale of life on Earth. ESM C models define a new state of the 41 | art for protein representation learning. 42 | feedback: unknown 43 | intended_uses: "one that can be used by builders across a wide range of applications,\ 44 | \ to imbue AI models with a deeper understanding of the biology of life\u2019\ 45 | s most important and mysterious molecules." 46 | license: Cambrian Open License 47 | modality: 48 | explanation: 'Just as a person can fill in the blanks, such as: To __ or not to 49 | __ that is the ___ We can train language models to fill in the blanks. Except 50 | in biology, instead of training the model to predict words, we train it to predict 51 | the characters in a protein sequence, i.e. its sequence of amino acids.' 52 | value: text; text 53 | model_card: unknown 54 | monitoring: unknown 55 | name: ESM Cambrian 56 | nationality: USA 57 | organization: EvolutionaryScale 58 | prohibited_uses: unknown 59 | quality_control: ESM C was reviewed by a committee of scientific experts who concluded 60 | that the benefits of releasing the models greatly outweigh any potential risks. 61 | size: 62 | explanation: 'ESM C is trained at three scales: 300M, 600M, and 6B parameters.' 63 | value: 6B parameters 64 | training_emissions: unknown 65 | training_hardware: unknown 66 | training_time: unknown 67 | type: model 68 | url: https://www.evolutionaryscale.ai/blog/esm-cambrian 69 | -------------------------------------------------------------------------------- /assets/cognitive.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: none 3 | created_date: 2023-11-14 4 | dependencies: 5 | - Dolphin 6 | - Yi 7 | description: Dolphin 2.2 Yi is an LLM based off Yi. 8 | feedback: https://huggingface.co/cognitivecomputations/dolphin-2_2-yi-34b/discussions 9 | intended_uses: '' 10 | license: 11 | explanation: can be found at https://huggingface.co/cognitivecomputations/dolphin-2_2-yi-34b/blob/main/LICENSE 12 | value: custom 13 | modality: text; text 14 | model_card: https://huggingface.co/cognitivecomputations/dolphin-2_2-yi-34b 15 | monitoring: unknown 16 | name: Dolphin 2.2 Yi 17 | nationality: USA 18 | organization: Cognitive Computations 19 | prohibited_uses: '' 20 | quality_control: '' 21 | size: 34B parameters (dense) 22 | training_emissions: unknown 23 | training_hardware: 4 A100 GPUs 24 | training_time: 3 days 25 | type: model 26 | url: https://erichartford.com/dolphin 27 | - access: open 28 | analysis: Evaluated on OpenLLM leaderboard. 29 | created_date: 30 | explanation: release date is not published; estimated to be sometime in either 31 | May or June 2023. 32 | value: 2023-06-01 33 | dependencies: 34 | - WizardLM 35 | description: WizardLM Uncensored is WizardLM trained with a subset of the dataset 36 | - responses that contained alignment / moralizing were removed. 37 | feedback: https://huggingface.co/cognitivecomputations/WizardLM-30B-Uncensored/discussions 38 | intended_uses: '' 39 | license: unknown 40 | modality: text; text 41 | model_card: https://huggingface.co/cognitivecomputations/WizardLM-30B-Uncensored 42 | monitoring: unknown 43 | name: WizardLM Uncensored 44 | nationality: USA 45 | organization: Cognitive Computations 46 | prohibited_uses: '' 47 | quality_control: '' 48 | size: 30B parameters (dense) 49 | training_emissions: unknown 50 | training_hardware: unknown 51 | training_time: unknown 52 | type: model 53 | url: https://huggingface.co/cognitivecomputations/WizardLM-30B-Uncensored 54 | -------------------------------------------------------------------------------- /assets/columbia.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on wide range of tasks using own evaluation benchmarks. 3 | created_date: 2022-11-20 4 | dependencies: 5 | - AlphaFold2 6 | - OpenProteinSet 7 | description: OpenFold is an open source recreation of AlphaFold2. 8 | feedback: '' 9 | intended_uses: '' 10 | license: CC BY 4.0 11 | modality: amino acid sequence; protein structure 12 | model_card: '' 13 | monitoring: '' 14 | name: OpenFold 15 | nationality: USA 16 | organization: Columbia 17 | prohibited_uses: '' 18 | quality_control: '' 19 | size: '' 20 | training_emissions: unknown 21 | training_hardware: Single A100 NVIDIA GPU 22 | training_time: 50,000 GPU hours 23 | type: model 24 | url: https://www.biorxiv.org/content/10.1101/2022.11.20.517210v2 25 | - access: open 26 | analysis: Evaluated on the object hallucination benchmark and compared to GPT-4V. 27 | created_date: 2023-10-11 28 | dependencies: 29 | - CLIP 30 | - Vicuna 31 | description: Ferret is a Multimodal Large Language Model (MLLM) capable of understanding 32 | spatial referring of any shape or granularity within an image and accurately grounding 33 | open-vocabulary descriptions. 34 | feedback: none 35 | intended_uses: '' 36 | license: 37 | explanation: License can be found at https://github.com/apple/ml-ferret/blob/main/LICENSE 38 | value: Apple 39 | modality: image, text; image, text 40 | model_card: none 41 | monitoring: none 42 | name: Ferret 43 | nationality: unknown 44 | organization: Columbia, Apple AI 45 | prohibited_uses: '' 46 | quality_control: '' 47 | size: 13B parameters 48 | training_emissions: unknown 49 | training_hardware: 8 A100 GPUs 50 | training_time: 2.5 to 5 days 51 | type: model 52 | url: https://arxiv.org/pdf/2310.07704.pdf 53 | -------------------------------------------------------------------------------- /assets/compute_01_research.yaml: -------------------------------------------------------------------------------- 1 | - access: 2 | explanation: to ensure AGI will be open-source, transparent, and accessible 3 | value: open 4 | analysis: '' 5 | created_date: 2024-10-11 6 | dependencies: 7 | - DeepMind's Distributed Low-Communication (DiLoCo) 8 | description: INTELLECT-1 is a decentralized training model with 10 billion parameters, 9 | designed to be open-source and transparent, allowing global contributors to participate 10 | in its computation. 11 | feedback: unknown 12 | intended_uses: Training globally distributed AI models in decentralized environments. 13 | license: Apache 2.0 14 | modality: text; text 15 | model_card: unknown 16 | monitoring: unknown 17 | name: INTELLECT-1 18 | nationality: USA 19 | organization: PrimeIntellect 20 | prohibited_uses: '' 21 | quality_control: '' 22 | size: 23 | explanation: a 10-billion-parameter model 24 | value: 10B parameters 25 | training_emissions: unknown 26 | training_hardware: unknown 27 | training_time: unknown 28 | type: model 29 | url: https://www.primeintellect.ai/blog/intellect-1 30 | -------------------------------------------------------------------------------- /assets/continue.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | adaptation: '' 3 | created_date: 2023-07-26 4 | dependencies: 5 | - GPT-4 API 6 | - Code Llama 7 | - Claude API 8 | - WizardCoder 9 | - PaLM API 10 | description: "Continue is the open-source autopilot for software development. It\ 11 | \ is an IDE extension that brings the power of ChatGPT to VS Code and JetBrains.\ 12 | \ It\u2019s built to be deeply customizable and continuously learn from development\ 13 | \ data." 14 | failures: '' 15 | feedback: '' 16 | intended_uses: '' 17 | license: Apache 2.0 18 | monitoring: '' 19 | monthly_active_users: '' 20 | name: Continue 21 | nationality: USA 22 | organization: Continue Dev, Inc. 23 | output_space: '' 24 | prohibited_uses: '' 25 | quality_control: '' 26 | terms_of_service: https://github.com/continuedev/continue/blob/main/LICENSE 27 | type: application 28 | url: https://continue.dev 29 | user_distribution: '' 30 | -------------------------------------------------------------------------------- /assets/cresta.yaml: -------------------------------------------------------------------------------- 1 | - access: closed 2 | analysis: Outperforms GPT-4 in common sense and reasoning tasks on the basis of 3 | both efficiency and accuracy. 4 | created_date: 2023-06-20 5 | dependencies: 6 | - GPT-4 7 | - Claude 8 | - Falcon-40B 9 | description: Ocean-1 is the culmination of Cresta's experience in deploying generative 10 | AI systems for large enterprises and signifies their latest milestone in advancing 11 | the cutting edge AI technology for customer facing conversations. 12 | feedback: none 13 | intended_uses: Acting as a contact center chatbot agent. 14 | license: unknown 15 | modality: text; text 16 | model_card: none 17 | monitoring: unknown 18 | name: Ocean-1 19 | nationality: USA 20 | organization: Cresta 21 | prohibited_uses: none 22 | quality_control: '' 23 | size: 7B parameters (dense) 24 | training_emissions: unknown 25 | training_hardware: unknown 26 | training_time: unknown 27 | type: model 28 | url: https://cresta.com/blog/introducing-ocean-1-worlds-first-contact-center-foundation-model/ 29 | -------------------------------------------------------------------------------- /assets/deci.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on the OpenLLM benchmarks and, on release, outperforms all other 3 | 7B models on the OpenLLM Leaderboard. 4 | created_date: 2023-12-12 5 | dependencies: [] 6 | description: DeciLM is a LLM that on release ranks as the fastest and most accurate 7 | model of its size. 8 | feedback: none 9 | intended_uses: This model is intended for commercial and research use in English 10 | and can be fine-tuned for use in other languages. 11 | license: Apache 2.0 12 | modality: text; text 13 | model_card: https://deci.ai/model-zoo/decilm-7b/ 14 | monitoring: unknown 15 | name: DeciLM 16 | nationality: Israel 17 | organization: Deci 18 | prohibited_uses: '' 19 | quality_control: '' 20 | size: 7B parameters (dense) 21 | training_emissions: unknown 22 | training_hardware: NVIDIA A10 GPUs 23 | training_time: unknown 24 | type: model 25 | url: https://deci.ai/blog/introducing-decilm-7b-the-fastest-and-most-accurate-7b-large-language-model-to-date 26 | -------------------------------------------------------------------------------- /assets/deepnight.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: none 3 | created_date: 2023-11-04 4 | dependencies: [] 5 | description: SaiLy is a series/collection of AI Models by Deepnight Research which 6 | are highly experimental and uncensored. 7 | feedback: https://huggingface.co/deepnight-research/saily_100b/discussions 8 | intended_uses: '' 9 | license: MIT 10 | modality: text; text 11 | model_card: https://huggingface.co/deepnight-research/saily_100b 12 | monitoring: unknown 13 | name: SaiLY 14 | nationality: International 15 | organization: Deepnight Research 16 | prohibited_uses: '' 17 | quality_control: '' 18 | size: 100B parameters (dense) 19 | training_emissions: unknown 20 | training_hardware: unknown 21 | training_time: unknown 22 | type: model 23 | url: https://huggingface.co/deepnight-research/saily_100b 24 | -------------------------------------------------------------------------------- /assets/deepseek.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Deepseek and baseline models (for comparison) evaluated on a series of 3 | representative benchmarks, both in English and Chinese. 4 | created_date: 2023-11-28 5 | dependencies: [] 6 | description: Deepseek is a 67B parameter model with Grouped-Query Attention trained 7 | on 2 trillion tokens from scratch. 8 | feedback: https://huggingface.co/deepseek-ai/deepseek-llm-67b-base/discussions 9 | intended_uses: '' 10 | license: 11 | explanation: Model license can be found at https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL. 12 | Code license is under MIT 13 | value: custom 14 | modality: text; text 15 | model_card: https://huggingface.co/deepseek-ai/deepseek-llm-67b-base 16 | monitoring: unknown 17 | name: Deepseek 18 | nationality: USA 19 | organization: Deepseek AI 20 | prohibited_uses: none 21 | quality_control: Training dataset comprised of diverse data composition and pruned 22 | and deduplicated. 23 | size: 67B parameters (dense) 24 | training_emissions: unknown 25 | training_hardware: unknown 26 | training_time: unknown 27 | type: model 28 | url: https://github.com/deepseek-ai/DeepSeek-LLM 29 | - access: open 30 | analysis: Deepseek and baseline models (for comparison) evaluated on a series of 31 | representative benchmarks, both in English and Chinese. 32 | created_date: 2023-11-29 33 | dependencies: 34 | - Deepseek 35 | description: Deepseek Chat is a 67B parameter model initialized from Deepseek and 36 | fine-tuned on extra instruction data. 37 | feedback: https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat/discussions 38 | intended_uses: '' 39 | license: 40 | explanation: Model license can be found at https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL. 41 | Code license is under MIT 42 | value: custom 43 | modality: text; text 44 | model_card: https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat 45 | monitoring: unknown 46 | name: Deepseek Chat 47 | nationality: USA 48 | organization: Deepseek AI 49 | prohibited_uses: none 50 | quality_control: Training dataset comprised of diverse data composition and pruned 51 | and deduplicated. 52 | size: 67B parameters (dense) 53 | training_emissions: unknown 54 | training_hardware: unknown 55 | training_time: unknown 56 | type: model 57 | url: https://github.com/deepseek-ai/DeepSeek-LLM 58 | - access: open 59 | analysis: Evaluated on code generation, code completion, cross-file code completion, 60 | and program-based math reasoning across standard benchmarks. 61 | created_date: 2023-11-03 62 | dependencies: [] 63 | description: Deepseek Coder is composed of a series of code language models, each 64 | trained from scratch on 2T tokens, with a composition of 87% code and 13% natural 65 | language in both English and Chinese. 66 | feedback: https://huggingface.co/deepseek-ai/deepseek-coder-33b-base/discussions 67 | intended_uses: '' 68 | license: 69 | explanation: Model license can be found at https://github.com/deepseek-ai/DeepSeek-Coder/blob/main/LICENSE-MODEL. 70 | Code license is under MIT 71 | value: custom 72 | modality: text; code 73 | model_card: https://huggingface.co/deepseek-ai/deepseek-coder-33b-base 74 | monitoring: unknkown 75 | name: Deepseek Coder 76 | nationality: USA 77 | organization: Deepseek AI 78 | prohibited_uses: '' 79 | quality_control: '' 80 | size: 33B parameters (dense) 81 | training_emissions: unknown 82 | training_hardware: 8 NVIDIA A100 GPUs and 8 NVIDIA H800 GPUs 83 | training_time: unknown 84 | type: model 85 | url: https://github.com/deepseek-ai/DeepSeek-Coder 86 | -------------------------------------------------------------------------------- /assets/dibt.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: none 3 | created_date: 2024-02-27 4 | datasheet: https://huggingface.co/datasets/DIBT/10k_prompts_ranked 5 | dependencies: [] 6 | description: 10k_prompts_ranked is a dataset of prompts with quality rankings created 7 | by 314 members of the open-source ML community using Argilla, an open-source tool 8 | to label data. 9 | excluded: '' 10 | feedback: https://huggingface.co/datasets/DIBT/10k_prompts_ranked/discussions 11 | included: '' 12 | intended_uses: Training and evaluating language models on prompt ranking tasks and 13 | as a dataset that can be filtered only to include high-quality prompts. These 14 | can serve as seed data for generating synthetic prompts and generations. 15 | license: unknown 16 | modality: text 17 | monitoring: '' 18 | name: 10k_prompts_ranked 19 | nationality: USA 20 | organization: Data is Better Together 21 | prohibited_uses: This dataset only contains rankings for prompts, not prompt/response 22 | pairs so it is not suitable for direct use for supervised fine-tuning of language 23 | models. 24 | quality_control: '' 25 | sample: [] 26 | size: 10k examples 27 | type: dataset 28 | url: https://huggingface.co/blog/community-datasets 29 | -------------------------------------------------------------------------------- /assets/duckduckgo.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | adaptation: '' 3 | created_date: 2023-03-08 4 | dependencies: 5 | - Anthropic API 6 | description: The first Instant Answer in DuckDuckGo search results to use natural 7 | language technology to generate answers to search queries using Wikipedia and 8 | other related sources 9 | failures: '' 10 | feedback: '' 11 | intended_uses: '' 12 | license: unknown 13 | monitoring: '' 14 | monthly_active_users: '' 15 | name: DuckAssist 16 | nationality: USA 17 | organization: DuckDuckGo 18 | output_space: '' 19 | prohibited_uses: '' 20 | quality_control: '' 21 | terms_of_service: '' 22 | type: application 23 | url: https://spreadprivacy.com/duckassist-launch/ 24 | user_distribution: '' 25 | -------------------------------------------------------------------------------- /assets/duolingo.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | adaptation: '' 3 | created_date: 2023-03-14 4 | dependencies: 5 | - GPT-4 API 6 | description: Explain My Answer offers learners the chance to learn more about their 7 | response in a lesson (whether their answer was correct or incorrect!) By tapping 8 | a button after certain exercise types, learners can enter a chat with Duo to get 9 | a simple explanation on why their answer was right or wrong, and ask for examples 10 | or further clarification. 11 | failures: '' 12 | feedback: '' 13 | intended_uses: '' 14 | license: 15 | explanation: 'Users are granted a limited license per Duolingo Terms. 16 | 17 | ' 18 | value: custom 19 | monitoring: '' 20 | monthly_active_users: '' 21 | name: Duolingo Explain My Answer 22 | nationality: USA 23 | organization: Duolingo 24 | output_space: '' 25 | prohibited_uses: '' 26 | quality_control: '' 27 | terms_of_service: https://www.duolingo.com/terms 28 | type: application 29 | url: https://blog.duolingo.com/duolingo-max/ 30 | user_distribution: '' 31 | - access: limited 32 | adaptation: '' 33 | created_date: 2023-03-14 34 | dependencies: 35 | - Duolingo Role Play 36 | - Duolingo Explain My Answer 37 | description: Duolingo Max is a new subscription tier above Super Duolingo that gives 38 | learners access to two brand-new features and exercises - Explain My Answer and 39 | Roleplay. 40 | failures: '' 41 | feedback: '' 42 | intended_uses: '' 43 | license: 44 | explanation: 'Users are granted a limited license per Duolingo Terms. 45 | 46 | ' 47 | value: custom 48 | monitoring: '' 49 | monthly_active_users: '' 50 | name: Duolingo Max 51 | nationality: USA 52 | organization: Duolingo 53 | output_space: '' 54 | prohibited_uses: '' 55 | quality_control: '' 56 | terms_of_service: '' 57 | type: application 58 | url: https://blog.duolingo.com/duolingo-max/ 59 | user_distribution: '' 60 | - access: limited 61 | adaptation: '' 62 | created_date: 2023-03-14 63 | dependencies: 64 | - GPT-4 API 65 | description: "Roleplay allows learners to practice real-world conversation skills\ 66 | \ with world characters in the app. These challenges, which earn XP, will live\ 67 | \ alongside the path as one of the \u201CSide Quests\u201D learners can access\ 68 | \ by tapping on the character. What will you talk about? We\u2019ll guide you\ 69 | \ through different scenarios! Learners might discuss future vacation plans with\ 70 | \ Lin, order coffee at a caf\xE9 in Paris, go furniture shopping with Eddy, or\ 71 | \ ask a friend to go for a hike." 72 | failures: '' 73 | feedback: '' 74 | intended_uses: '' 75 | license: 76 | explanation: '"Subject to the terms of these Terms and Conditions, Duolingo grants 77 | you a non-transferable, non-exclusive license to download, install, and use 78 | one copy of each App in object code form only on an interactive wireless device 79 | that you own or control" - excerpt from the Terms of Service document. 80 | 81 | ' 82 | value: custom 83 | monitoring: '' 84 | monthly_active_users: '' 85 | name: Duolingo Role Play 86 | nationality: USA 87 | organization: Duolingo 88 | output_space: '' 89 | prohibited_uses: '' 90 | quality_control: '' 91 | terms_of_service: https://www.duolingo.com/terms 92 | type: application 93 | url: https://blog.duolingo.com/duolingo-max/ 94 | user_distribution: '' 95 | -------------------------------------------------------------------------------- /assets/ens.yaml: -------------------------------------------------------------------------------- 1 | - access: 2 | explanation: Dataset, evaluation code and models are publicly available at the 3 | [[HowTo100M dataset webpage]](https://www.di.ens.fr/willow/research/howto100m/). 4 | value: open 5 | analysis: 6 | explanation: See [[Experiments]](https://arxiv.org/pdf/1906.03327v2.pdf#section.5) 7 | value: Authors use the dataset to learn a joint text-video embedding by leveraging 8 | more than 130M video clip-caption pairs. They then evaluate the learned embeddings 9 | on the tasks of localizing steps in instructional videos of CrossTask and textbased 10 | video retrieval on YouCook2, MSR-VTT and LSMDC datasets. They show that their 11 | learned embedding can perform better compared to models trained on existing 12 | carefully annotated but smaller video description datasets. 13 | created_date: 14 | explanation: The date the [[paper]](https://arxiv.org/abs/1906.03327) was submitted. 15 | value: 2019-06-07 16 | datasheet: https://arxiv.org/pdf/1906.03327.pdf 17 | dependencies: 18 | - YouTube 19 | description: HowTo100M is a large-scale dataset of narrated videos with an emphasis 20 | on instructional videos where content creators teach complex tasks with an explicit 21 | intention of explaining the visual content on screen. HowTo100M features a total 22 | of 136M video clips with captions sourced from 1.2M Youtube videos (15 years of 23 | video) and 23k activities from domains such as cooking, hand crafting, personal 24 | care, gardening or fitness. 25 | excluded: 26 | explanation: See [[Data collection]](https://arxiv.org/pdf/1906.03327v2.pdf#subsection.3.1) 27 | value: Categories such as Relationships and Finance and Business, that may be 28 | more abstract, are excluded. Videos with less than 100 views are removed. Authors 29 | also ignore videos that have less than 100 words. Videos longer than 2,000 seconds 30 | are removed. As some videos may appear in several tasks, the videos are deduplicated 31 | based on YouTube IDs. 32 | feedback: '' 33 | included: 34 | explanation: See [[Data collection]](https://arxiv.org/pdf/1906.03327v2.pdf#subsection.3.1) 35 | value: The dataset features 1.22 million videos from YouTube with a primary focus 36 | on videos containing "visual tasks", that involve some interaction with the 37 | physical world (e.g. Making peanut butter, Pruning a tree) as compared to others 38 | that are more abstract (e.g. Ending a toxic relationship, Choosing a gift). 39 | To obtain predominantly visual tasks, the authors limit them to one of 12 categories 40 | (Food and Entertaining, Home and Garden, Hobbies and Crafts, Cars & Other Vehicles, 41 | Pets and Animals, Holidays and Traditions, Personal Care and Style, Sports and 42 | Fitness, Health, Education and Communications, Arts and Entertainment, Computers 43 | and Electronics). They also restrict to the top 200 YouTube search results, 44 | as the latter ones may not be related to the query task. 45 | intended_uses: '' 46 | license: Apache 2.0 47 | modality: text, video 48 | monitoring: '' 49 | name: HowTo100M 50 | nationality: unknown 51 | organization: "\xC9cole Normale Sup\xE9rieure, Inria" 52 | prohibited_uses: 53 | explanation: See [[HowTo100M dataset webpage]](https://www.di.ens.fr/willow/research/howto100m/) 54 | value: 'No uses are explicitly prohibited by the authors. They note the following 55 | limitations of the dataset: "We note that the distribution of identities and 56 | activities in the HowTo100M dataset may not be representative of the global 57 | human population and the diversity in society. Please be careful of unintended 58 | societal, gender, racial and other biases when training or deploying models 59 | trained on this data." 60 | 61 | ' 62 | quality_control: '' 63 | sample: [] 64 | size: 136M video clips 65 | type: dataset 66 | url: https://arxiv.org/pdf/1906.03327.pdf 67 | -------------------------------------------------------------------------------- /assets/epfl.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on TruthfulQA as main evaluation benchmark. 3 | created_date: 2023-11-27 4 | dependencies: [] 5 | description: Meditron is a large-scale medical LLM that remains open-source. 6 | feedback: https://huggingface.co/epfl-llm/meditron-7b/discussions 7 | intended_uses: Medical exam question answering, supporting differential diagnosis, 8 | disease information. 9 | license: LLaMA 2 10 | modality: text; text 11 | model_card: https://huggingface.co/epfl-llm/meditron-70b 12 | monitoring: none 13 | name: MediTron 14 | nationality: unknown 15 | organization: EPFL, Idiap Research Institute, OpenAssistant, Yale 16 | prohibited_uses: '' 17 | quality_control: '' 18 | size: 70B parameters (dense) 19 | training_emissions: unknown 20 | training_hardware: unknown 21 | training_time: unknown 22 | type: model 23 | url: https://arxiv.org/pdf/2311.16079.pdf 24 | -------------------------------------------------------------------------------- /assets/evolutionaryscale.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: The model was tested in the generation of a new green fluorescent protein. 3 | Its effectiveness was compared to natural evolutionary processes, and it was deemed 4 | to simulate over 500 million years of evolution. 5 | created_date: 2024-06-25 6 | dependencies: [] 7 | description: ESM3 is the first generative model for biology that simultaneously 8 | reasons over the sequence, structure, and function of proteins. It is trained 9 | across the natural diversity of Earth, reasoning over billions of proteins from 10 | diverse environments. It advances the ability to program and create with the code 11 | of life, simulating evolution, and making biology programmable. ESM3 is generative, 12 | and scientists can guide the model to create proteins for various applications. 13 | feedback: Unknown 14 | intended_uses: To engineer biology from first principles. It functions as a tool 15 | for scientists to create proteins for various applications, including medicine, 16 | biology research, and clean energy. 17 | license: Unknown 18 | modality: text; image, text 19 | model_card: unknown 20 | monitoring: Unknown though specific measures are not specified. 21 | name: ESM3 22 | nationality: USA 23 | organization: EvolutionaryScale 24 | prohibited_uses: Unknown 25 | quality_control: The creators have put in place a responsible development framework 26 | to ensure transparency and accountability from the start. ESM3 was tested in the 27 | generation of a new protein, ensuring its quality and effectiveness. 28 | size: 98B parameters (Dense) 29 | training_emissions: Unknown 30 | training_hardware: unknown 31 | training_time: Unknown 32 | type: model 33 | url: https://www.evolutionaryscale.ai/blog/esm3-release 34 | -------------------------------------------------------------------------------- /assets/faraday.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | adaptation: '' 3 | created_date: 4 | explanation: The date Faraday Lab's hugging face card was last updated. Date for 5 | ARES release is not published on site. 6 | value: 2023-04-26 7 | dependencies: 8 | - Stable Diffusion 9 | description: ARES is a text-to-image generator based on Stable Diffusion. The goal 10 | is to provide a simple tool with a user interface allowing mainstream AI access 11 | for artists and creators. 12 | failures: '' 13 | feedback: '' 14 | intended_uses: '' 15 | license: unknown 16 | monitoring: '' 17 | monthly_active_users: '' 18 | name: ARES 19 | nationality: USA 20 | organization: Faraday Lab 21 | output_space: generated images 22 | prohibited_uses: '' 23 | quality_control: '' 24 | terms_of_service: '' 25 | type: application 26 | url: https://faradaylab.fr/ 27 | user_distribution: '' 28 | -------------------------------------------------------------------------------- /assets/fuse.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: The FuseChat model was evaluated on MT-Bench which comprises 80 multi-turn 3 | dialogues spanning writing, roleplay, reasoning, math, coding, stem, and humanities 4 | domains. It yields an average performance of 66.52 with specific scores for individual 5 | domains available in the leaderboard results. 6 | created_date: 2024-02-26 7 | dependencies: 8 | - Nous Hermes 2 9 | - OpenChat 3.5 10 | description: FuseChat is a powerful chat Language Learning Model (LLM) that integrates 11 | multiple structure and scale-varied chat LLMs using a fuse-then-merge strategy. 12 | The fusion is done using two stages 13 | feedback: https://huggingface.co/FuseAI/FuseChat-7B-VaRM/discussions 14 | intended_uses: FuseChat is intended to be used as a powerful chat bot that takes 15 | in text inputs and provides text-based responses. It can be utilized in a variety 16 | of domains including writing, roleplay, reasoning, math, coding, stem, and humanities. 17 | license: Apache 2.0 18 | modality: text; text 19 | model_card: https://huggingface.co/FuseAI/FuseChat-7B-VaRM 20 | monitoring: unknown 21 | name: FuseChat 22 | nationality: USA 23 | organization: FuseAI 24 | prohibited_uses: unknown 25 | quality_control: none 26 | size: 7B parameters 27 | training_emissions: unknown 28 | training_hardware: unknown 29 | training_time: unknown 30 | type: model 31 | url: https://arxiv.org/abs/2402.16107 32 | -------------------------------------------------------------------------------- /assets/genmo.yaml: -------------------------------------------------------------------------------- 1 | - access: 2 | explanation: open state-of-the-art video generation model... The weights and architecture 3 | for Mochi 1 are open 4 | value: open 5 | analysis: Mochi 1 sets a new best-in-class standard for open-source video generation. 6 | It also performs very competitively with the leading closed models... We benchmark 7 | prompt adherence with an automated metric using a vision language model as a judge 8 | following the protocol in OpenAI DALL-E 3. We evaluate generated videos using 9 | Gemini-1.5-Pro-002. 10 | created_date: 2025-01-14 11 | dependencies: 12 | - DDPM 13 | - DreamFusion 14 | - Emu Video 15 | - T5-XXL 16 | description: Mochi 1 is an open-source video generation model designed to produce 17 | high-fidelity motion and strong prompt adherence in generated videos, setting 18 | a new standard for open video generation systems. 19 | feedback: unknown 20 | intended_uses: Advance the field of video generation and explore new methodologies. 21 | Build innovative applications in entertainment, advertising, education, and more. 22 | Empower artists and creators to bring their visions to life with AI-generated 23 | videos. Generate synthetic data for training AI models in robotics, autonomous 24 | vehicles and virtual environments. 25 | license: 26 | explanation: We're releasing the model under a permissive Apache 2.0 license. 27 | value: Apache 2.0 28 | modality: 29 | explanation: Mochi 1 generates smooth videos... Measures how accurately generated 30 | videos follow the provided textual instructions 31 | value: text; video 32 | model_card: unknown 33 | monitoring: unknown 34 | name: Mochi 1 35 | nationality: USA 36 | organization: Genmo 37 | prohibited_uses: unknown 38 | quality_control: robust safety moderation protocols in the playground to ensure 39 | that all video generations remain safe and aligned with ethical guidelines. 40 | size: 41 | explanation: featuring a 10 billion parameter diffusion model 42 | value: 10B parameters 43 | training_emissions: unknown 44 | training_hardware: unknown 45 | training_time: unknown 46 | type: model 47 | url: https://www.genmo.ai/blog 48 | -------------------------------------------------------------------------------- /assets/glm.yaml: -------------------------------------------------------------------------------- 1 | - access: Open 2 | analysis: Evaluations show that GLM-4, 1) closely rivals or outperforms GPT-4 in 3 | terms of general metrics such as MMLU, GSM8K, MATH, BBH, GPQA, and HumanEval, 4 | 2) gets close to GPT-4-Turbo in instruction following as measured by IFEval, 3) 5 | matches GPT-4 Turbo (128K) and Claude 3 for long context tasks, and 4) outperforms 6 | GPT-4 in Chinese alignments as measured by AlignBench. 7 | created_date: 2023-07-02 8 | dependencies: [] 9 | description: ChatGLM is an evolving family of large language models that have been 10 | developed over time. The GLM-4 language series, includes GLM-4, GLM-4-Air, and 11 | GLM-4-9B. They are pre-trained on ten trillions of tokens mostly in Chinese and 12 | English and are aligned primarily for Chinese and English usage. The high-quality 13 | alignment is achieved via a multi-stage post-training process, which involves 14 | supervised fine-tuning and learning from human feedback. GLM-4 All Tools model 15 | is further aligned to understand user intent and autonomously decide when and 16 | which tool(s) to use. 17 | feedback: Unknown 18 | intended_uses: General language modeling, complex tasks like accessing online information 19 | via web browsing and solving math problems using Python interpreter. 20 | license: Apache 2.0 21 | modality: text; text 22 | model_card: https://huggingface.co/THUDM/glm-4-9b 23 | monitoring: Unknown 24 | name: ChatGLM 25 | nationality: unknown 26 | organization: Team GLM, Zhipu AI, Tsinghua University 27 | prohibited_uses: Unknown 28 | quality_control: High-quality alignment is achieved via a multi-stage post-training 29 | process, which involves supervised fine-tuning and learning from human feedback. 30 | size: 9B parameters 31 | training_emissions: Unknown 32 | training_hardware: Unknown 33 | training_time: Unknown 34 | type: model 35 | url: https://arxiv.org/pdf/2406.12793 36 | -------------------------------------------------------------------------------- /assets/greenbit.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on common LLM benchmarks. 3 | created_date: 2023-09-29 4 | dependencies: 5 | - LLaMA 6 | description: GreenBit LLaMA is a series of fine-tuned LLaMA models. 7 | feedback: https://huggingface.co/GreenBitAI/LLaMA-30B-2bit-groupsize8/discussions 8 | intended_uses: '' 9 | license: Apache 2.0 10 | modality: text; text 11 | model_card: https://huggingface.co/GreenBitAI/LLaMA-30B-2bit-groupsize8 12 | monitoring: unknown 13 | name: GreenBit LLaMA 14 | nationality: USA 15 | organization: GreenBit AI 16 | prohibited_uses: '' 17 | quality_control: '' 18 | size: 30B parameters (dense) 19 | training_emissions: unknown 20 | training_hardware: unknown 21 | training_time: unknown 22 | type: model 23 | url: https://github.com/GreenBitAI/low_bit_llama 24 | -------------------------------------------------------------------------------- /assets/h2oai.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on EleutherAI evaluation harness. 3 | created_date: 2023-06-16 4 | dependencies: 5 | - GPT-NeoX 6 | - H2O AI OpenAssistant 7 | - h2oGPT Repositories 8 | description: Series of models fine-tuned on well-known LLMs using the h2oGPT repositories. 9 | feedback: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b/discussions 10 | intended_uses: '' 11 | license: Apache 2.0 12 | modality: text; text 13 | model_card: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b 14 | monitoring: '' 15 | name: h2oGPT 16 | nationality: USA 17 | organization: H2O AI 18 | prohibited_uses: '' 19 | quality_control: '' 20 | size: 20B parameters (dense) 21 | training_emissions: unknown 22 | training_hardware: unspecified number of 48GB A100 NVIDIA GPUs 23 | training_time: unknown 24 | type: model 25 | url: https://arxiv.org/pdf/2306.08161.pdf 26 | - access: open 27 | analysis: Evaluated on common sense and world knowledge benchmarks. 28 | created_date: 2024-01-30 29 | dependencies: [] 30 | description: H2O Danube is a language model trained on 1T tokens following the core 31 | principles of LLaMA 2 and Mistral. 32 | feedback: https://huggingface.co/h2oai/h2o-danube-1.8b-base/discussions 33 | intended_uses: '' 34 | license: Apache 2.0 35 | modality: text; text 36 | model_card: https://huggingface.co/h2oai/h2o-danube-1.8b-base 37 | monitoring: unknown 38 | name: H2O Danube 39 | nationality: USA 40 | organization: H2O AI 41 | prohibited_uses: Users are encouraged to use the large language model responsibly 42 | and ethically. By using this model, you agree not to use it for purposes that 43 | promote hate speech, discrimination, harassment, or any form of illegal or harmful 44 | activities. 45 | quality_control: unknown 46 | size: 1.8B parameters (dense) 47 | training_emissions: unknown 48 | training_hardware: 8x H100 GPUs on a single node 49 | training_time: unknown 50 | type: model 51 | url: https://arxiv.org/pdf/2401.16818.pdf 52 | -------------------------------------------------------------------------------- /assets/hubspot.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | adaptation: '' 3 | created_date: 2023-01-31 4 | dependencies: 5 | - ChatGPT API 6 | description: Give your sales, marketing, and customer service teams one of the most 7 | powerful AI tools available - ChatGPT priority access, no timeout limits, company 8 | wide access managed through a single account, incorporate into your existing processes 9 | without leaving HubSpot 10 | failures: '' 11 | feedback: '' 12 | intended_uses: '' 13 | license: 14 | explanation: 'We weren''t able to locate a license. 15 | 16 | ' 17 | value: unknown 18 | monitoring: '' 19 | monthly_active_users: '' 20 | name: ChatGPT powered by OBO 21 | nationality: USA 22 | organization: HubSpot 23 | output_space: '' 24 | prohibited_uses: '' 25 | quality_control: '' 26 | terms_of_service: '' 27 | type: application 28 | url: https://ecosystem.hubspot.com/marketplace/apps/sales/sales-enablement/the-obo-group-chatgpt-1398072 29 | user_distribution: '' 30 | -------------------------------------------------------------------------------- /assets/idea.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on human and machine benchmarks in comparison to established 3 | image models as a baseline. 4 | created_date: 2024-01-26 5 | dependencies: 6 | - CLIP 7 | - LAION-400M 8 | - Wukong 9 | - Stable Diffusion XL 10 | description: Taiyi Diffusion XL is a new Chinese and English bilingual text-to-image 11 | model which is developed by extending the capabilities of CLIP and Stable-DiffusionXL. 12 | feedback: https://huggingface.co/IDEA-CCNL/Taiyi-Stable-Diffusion-XL-3.5B/discussions 13 | intended_uses: '' 14 | license: Apache 2.0 15 | modality: text; image 16 | model_card: https://huggingface.co/IDEA-CCNL/Taiyi-Stable-Diffusion-XL-3.5B 17 | monitoring: unknown 18 | name: Taiyi Diffusion XL 19 | nationality: unknown 20 | organization: International Digital Economy Academy, South China University of Technology, 21 | University of Science and Technology of China 22 | prohibited_uses: '' 23 | quality_control: unknown 24 | size: 3.5B parameters (dense) 25 | training_emissions: unknown 26 | training_hardware: unknown 27 | training_time: unknown 28 | type: model 29 | url: https://arxiv.org/pdf/2401.14688.pdf 30 | -------------------------------------------------------------------------------- /assets/ideogram.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | analysis: "Compared to DALL\xB7E 3 based on a qualitative user comparison." 3 | created_date: 2024-02-28 4 | dependencies: [] 5 | description: "Ideogram 1.0 is Ideogram\u2019s most advanced text-to-image model,\ 6 | \ as of release." 7 | feedback: none 8 | intended_uses: '' 9 | license: unknown 10 | modality: text; image 11 | model_card: none 12 | monitoring: '' 13 | name: Ideogram 1.0 14 | nationality: USA 15 | organization: Ideogram AI 16 | prohibited_uses: '' 17 | quality_control: '' 18 | size: unknown 19 | training_emissions: unknown 20 | training_hardware: unknown 21 | training_time: unknown 22 | type: model 23 | url: https://about.ideogram.ai/1.0 24 | -------------------------------------------------------------------------------- /assets/instacart.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | adaptation: '' 3 | created_date: 2023-03-01 4 | dependencies: 5 | - ChatGPT API 6 | description: "Instacart is augmenting the Instacart app to enable customers to ask\ 7 | \ about food and get inspirational, shoppable answers. This uses ChatGPT alongside\ 8 | \ Instacart\u2019s own AI and product data from their 75,000+ retail partner store\ 9 | \ locations to help customers discover ideas for open-ended shopping goals, such\ 10 | \ as \u201CHow do I make great fish tacos?\u201D or \u201CWhat\u2019s a healthy\ 11 | \ lunch for my kids?\u201D Instacart plans to launch \u201CAsk Instacart\u201D\ 12 | \ later this year." 13 | failures: '' 14 | feedback: '' 15 | intended_uses: '' 16 | license: '' 17 | monitoring: '' 18 | monthly_active_users: '' 19 | name: Ask Instacart 20 | nationality: USA 21 | organization: Instacart 22 | output_space: '' 23 | prohibited_uses: '' 24 | quality_control: '' 25 | terms_of_service: '' 26 | type: application 27 | url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis 28 | user_distribution: '' 29 | -------------------------------------------------------------------------------- /assets/internlm.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on the dimensions proposed by OpenCompass in comparison to other 3 | LLMs. 4 | created_date: 2023-09-20 5 | dependencies: [] 6 | description: InternLM is an LLM pre-trained on over 2.3T Tokens containing high-quality 7 | English, Chinese, and code data. 8 | feedback: https://huggingface.co/internlm/internlm-20b/discussions 9 | intended_uses: '' 10 | license: Apache 2.0 11 | modality: text; text 12 | model_card: https://huggingface.co/internlm/internlm-20b 13 | monitoring: unknown 14 | name: InternLM 15 | nationality: China 16 | organization: InternLM 17 | prohibited_uses: '' 18 | quality_control: '' 19 | size: 7B parameters (dense) 20 | training_emissions: unknown 21 | training_hardware: unknown 22 | training_time: unknown 23 | type: model 24 | url: https://github.com/InternLM/InternLM 25 | -------------------------------------------------------------------------------- /assets/juni.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | adaptation: '' 3 | created_date: unknown 4 | dependencies: 5 | - Anthropic API 6 | description: An online tutoring solution to help students achieve academic success. 7 | failures: '' 8 | feedback: '' 9 | intended_uses: '' 10 | license: unknown 11 | monitoring: '' 12 | monthly_active_users: '' 13 | name: Juni Tutor Bot 14 | nationality: USA 15 | organization: Juni Learning 16 | output_space: '' 17 | prohibited_uses: '' 18 | quality_control: '' 19 | terms_of_service: '' 20 | type: application 21 | url: https://junilearning.com/ 22 | user_distribution: '' 23 | -------------------------------------------------------------------------------- /assets/kaist.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated in comparison to the MusicCaps dataset and with respect to n-gram, 3 | neural metrics. 4 | created_date: 2023-07-31 5 | datasheet: none 6 | dependencies: 7 | - MusicCaps 8 | - Million Song Dataset 9 | - Magnatagtune 10 | description: LP-MusicCaps is a LLM-based pseudo music caption dataset. 11 | excluded: '' 12 | feedback: '' 13 | included: '' 14 | intended_uses: '' 15 | license: CC-BY-NC-4.0 16 | modality: {} 17 | monitoring: '' 18 | name: LP-MusicCaps 19 | nationality: South Korea 20 | organization: South Korea Graduate School of Culture Technology 21 | prohibited_uses: '' 22 | quality_control: '' 23 | sample: [] 24 | size: 2.2M captions paired with 0.5M audio clips 25 | type: dataset 26 | url: https://arxiv.org/pdf/2307.16372.pdf 27 | -------------------------------------------------------------------------------- /assets/khan.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | adaptation: '' 3 | created_date: 2023-03-14 4 | dependencies: 5 | - GPT-4 API 6 | description: An AI-powered assistant that functions as both a virtual tutor for 7 | students and a classroom assistant for teachers. 8 | failures: '' 9 | feedback: '' 10 | intended_uses: '' 11 | license: unknown 12 | monitoring: '' 13 | monthly_active_users: '' 14 | name: Khanmigo 15 | nationality: USA 16 | organization: Khan Academy 17 | output_space: '' 18 | prohibited_uses: '' 19 | quality_control: '' 20 | terms_of_service: '' 21 | type: application 22 | url: https://www.khanacademy.org/khan-labs#khanmigo 23 | user_distribution: '' 24 | -------------------------------------------------------------------------------- /assets/konan.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | analysis: none 3 | created_date: 2023-09-17 4 | dependencies: [] 5 | description: Konan LLM is a Large Language Model developed in-house by Konan Technology. 6 | Optimized for super-large AI training, it leverages high-quality, large-scale 7 | data and over 20 years of expertise in natural language processing. 8 | feedback: none 9 | intended_uses: Document generation, document review, Q&A, customer response scenarios. 10 | license: unknown 11 | modality: text; text 12 | model_card: none 13 | monitoring: '' 14 | name: Konan LLM 15 | nationality: Japan 16 | organization: Konan 17 | prohibited_uses: '' 18 | quality_control: '' 19 | size: 13B parameters 20 | training_emissions: unknown 21 | training_hardware: unknown 22 | training_time: unknown 23 | type: model 24 | url: https://en.konantech.com/en/llm/konanllm 25 | -------------------------------------------------------------------------------- /assets/kotoba.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: unknown 3 | created_date: 2024-03-13 4 | dependencies: [] 5 | description: Kotoba-Speech is a Transformer-based speech generative model that supports 6 | fluent text-to-speech generation in Japanese and one-shot voice cloning through 7 | speech prompt. 8 | feedback: https://huggingface.co/kotoba-tech/kotoba-speech-v0.1/discussions 9 | intended_uses: '' 10 | license: Apache 2.0 11 | modality: text; audio 12 | model_card: https://huggingface.co/kotoba-tech/kotoba-speech-v0.1 13 | monitoring: unknown 14 | name: Kotoba Speech 15 | nationality: Japan 16 | organization: Kotoba Tech 17 | prohibited_uses: '' 18 | quality_control: '' 19 | size: 1.2B parameters (dense) 20 | training_emissions: unknown 21 | training_hardware: unknown 22 | training_time: unknown 23 | type: model 24 | url: https://huggingface.co/kotoba-tech/kotoba-speech-v0.1 25 | -------------------------------------------------------------------------------- /assets/ktai.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: unknown 3 | created_date: 2023-10-31 4 | dependencies: 5 | - AI-HUB dataset 6 | - National Institute of Korean Language dataset 7 | description: Midm is a pre-trained Korean-English language model developed by KT. 8 | It takes text as input and creates text. The model is based on Transformer architecture 9 | for an auto-regressive language model. 10 | feedback: https://huggingface.co/KT-AI/midm-bitext-S-7B-inst-v1/discussions 11 | intended_uses: It is expected to be used for various research purposes. 12 | license: CC-BY-NC 4.0 13 | modality: text; text 14 | model_card: https://huggingface.co/KT-AI/midm-bitext-S-7B-inst-v1 15 | monitoring: unknown 16 | name: Midm 17 | nationality: South Korea 18 | organization: KT Corporation 19 | prohibited_uses: It cannot be used for commercial purposes. 20 | quality_control: KT tried to remove unethical expressions such as profanity, slang, 21 | prejudice, and discrimination from training data. 22 | size: 7B parameters 23 | training_emissions: unknown 24 | training_hardware: unknown 25 | training_time: unknown 26 | type: model 27 | url: https://huggingface.co/KT-AI/midm-bitext-S-7B-inst-v1 28 | -------------------------------------------------------------------------------- /assets/kunlun.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on several popular benchmarks and performance in different fields. 3 | created_date: 2023-10-30 4 | dependencies: 5 | - SkyPile 6 | description: The Skywork series is a family of large language models (LLMs) trained 7 | on a corpus of over 3.2 trillion tokens drawn from both English and Chinese texts. 8 | feedback: https://huggingface.co/Skywork/Skywork-13B-base/discussions 9 | intended_uses: '' 10 | license: 11 | explanation: can be found at https://github.com/SkyworkAI/Skywork/blob/main/LICENSE 12 | value: custom 13 | modality: text; text 14 | model_card: https://huggingface.co/Skywork/Skywork-13B-base 15 | monitoring: none 16 | name: Skywork 17 | nationality: China 18 | organization: Kunlun Inc. 19 | prohibited_uses: '' 20 | quality_control: '' 21 | size: 13B parameters (dense) 22 | training_emissions: unknown 23 | training_hardware: 512 A800-80GB GPUs 24 | training_time: 39 days 25 | type: model 26 | url: https://arxiv.org/pdf/2310.19341.pdf 27 | -------------------------------------------------------------------------------- /assets/laion_ev.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Re-LAION-5B aims to fix the issues as reported by Stanford Internet Observatory 3 | for the original LAION-5B. It is available for download in two versions, research 4 | and research-safe. In total, 2236 links that potentially led to inappropriate 5 | content were removed. 6 | created_date: 2024-08-30 7 | dependencies: 8 | - LAION-5B 9 | description: Re-LAION-5B is an updated version of LAION-5B, the first web-scale, 10 | text-link to images pair dataset to be thoroughly cleaned of known links to suspected 11 | CSAM. It is an open dataset for fully reproducible research on language-vision 12 | learning. This model was developed in response to issues identified by the Stanford 13 | Internet Observatory in December 2023. The updates were made in collaboration 14 | with multiple organizations like the Internet Watch Foundation (IWF), the Canadian 15 | Center for Child Protection (C3P), and Stanford Internet Observatory. 16 | feedback: Problems with the dataset should be reported to the LAION organization. 17 | They have open lines for communication with their partners and the broader research 18 | community. 19 | intended_uses: Re-LAION-5B is designed for research on language-vision learning. 20 | It can also be used by third parties to clean existing derivatives of LAION-5B 21 | by generating diffs and removing all matched content from their versions. 22 | license: Apache 2.0 23 | modality: text; image 24 | model_card: unknown 25 | monitoring: unknown 26 | name: Re-LAION-5B 27 | nationality: Germany 28 | organization: LAION e.V. 29 | prohibited_uses: The dataset should not be utilized for purposes that breach legal 30 | parameters or ethical standards, such as dealing with illegal content. 31 | quality_control: The model utilized lists of link and image hashes provided by partner 32 | organizations. These were used to remove inappropriate links from the original 33 | LAION-5B dataset to create Re-LAION-5B. 34 | size: 5.5B (text, image) pairs 35 | training_emissions: Unknown 36 | training_hardware: Unknown 37 | training_time: Unknown 38 | type: model 39 | url: https://laion -------------------------------------------------------------------------------- /assets/latitude.yaml: -------------------------------------------------------------------------------- 1 | - access: 2 | explanation: 'The game is available to public upon sign up. 3 | 4 | ' 5 | value: limited 6 | adaptation: '' 7 | created_date: 8 | explanation: 'Release date of the game on iOS and Android platforms [[Wikipedia]](https://en.wikipedia.org/wiki/AI_Dungeon). 9 | 10 | ' 11 | value: 2019-12-17 12 | dependencies: 13 | - OpenAI API 14 | description: 'AI Dungeon is a single-player text adventure game that uses AI to 15 | generate content. 16 | 17 | ' 18 | failures: '' 19 | feedback: '' 20 | intended_uses: '' 21 | license: 22 | explanation: '"Subject to your compliance with these Terms, the Company hereby 23 | grants to you, a personal, worldwide, royalty-free, non-assignable, non-sublicensable, 24 | non-transferrable, and non-exclusive license to use the software provided to 25 | you as part of the Services (and to download a single copy of the App onto the 26 | equipment or device specified by us)" - excerpt from the Terms of Service document. 27 | 28 | ' 29 | value: custom 30 | monitoring: '' 31 | monthly_active_users: '' 32 | name: AI Dungeon 33 | nationality: USA 34 | organization: Latitude 35 | output_space: '' 36 | prohibited_uses: '' 37 | quality_control: '' 38 | terms_of_service: https://play.aidungeon.io/main/termsOfService 39 | type: application 40 | url: https://play.aidungeon.io 41 | user_distribution: '' 42 | -------------------------------------------------------------------------------- /assets/lehigh.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: outperforms majority of preceding state-of-the-art models over 15 unique 3 | biomedical modalities. 4 | created_date: 2023-05-26 5 | dependencies: 6 | - GPT-style autoregressive decoder 7 | - BiomedGPT biomedical datasets 8 | description: BiomedGPT leverages self-supervision on large and diverse datasets 9 | to accept multi-modal inputs and perform a range of downstream tasks. 10 | feedback: '' 11 | intended_uses: furthering research in developing unified and generalist models for 12 | biomedicine. 13 | license: Apache 2.0 14 | modality: image, text; text 15 | model_card: '' 16 | monitoring: '' 17 | name: BiomedGPT 18 | nationality: USA 19 | organization: Lehigh University 20 | prohibited_uses: '' 21 | quality_control: No specific quality control is mentioned in model training, though 22 | details on data processing and how the model was trained are provided in the paper. 23 | size: 472M parameters (dense) 24 | training_emissions: unknown 25 | training_hardware: 10 NVIDIA A5000 GPUs 26 | training_time: unknown 27 | type: model 28 | url: https://arxiv.org/pdf/2305.17100.pdf 29 | -------------------------------------------------------------------------------- /assets/lg.yaml: -------------------------------------------------------------------------------- 1 | - access: closed 2 | analysis: none 3 | created_date: 2023-07-19 4 | dependencies: [] 5 | description: EXAONE 2.0 is a multimodal artificial intelligence that can be used 6 | to help develop new materials and medicines. 7 | feedback: none 8 | intended_uses: '' 9 | license: unknown 10 | modality: image, text; image, text 11 | model_card: none 12 | monitoring: '' 13 | name: EXAONE 2.0 14 | nationality: South Korea 15 | organization: LG AI Research 16 | prohibited_uses: '' 17 | quality_control: '' 18 | size: unknown 19 | training_emissions: unknown 20 | training_hardware: unknown 21 | training_time: unknown 22 | type: model 23 | url: https://www.lgresearch.ai/exaone 24 | -------------------------------------------------------------------------------- /assets/lg_ai_research.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: The model was evaluated extensively across a wide range of public and 3 | in-house benchmarks. The comparative analysis showed that the performance of EXAONE 4 | 3.0 was competitive in English and excellent in Korean compared to other large 5 | language models of a similar size. 6 | created_date: 2024-09-08 7 | dependencies: 8 | - MeCab 9 | description: EXAONE 3.0 is an instruction-tuned large language model developed by 10 | LG AI Research. It demonstrates notably robust performance across a range of tasks 11 | and benchmarks. It has been fine-tuned to be capable of complex reasoning and 12 | has a particular proficiency in Korean. The released 7.8B parameter model is designed 13 | to promote open research and innovation. 14 | feedback: Unknown 15 | intended_uses: The model was intended for non-commercial and research purposes. 16 | The capabilities of the model allow for use cases that involve advanced AI and 17 | language processing tasks, particularly in fields requiring proficiency in English 18 | and Korean. 19 | license: Unknown 20 | modality: text; text 21 | model_card: unknown 22 | monitoring: Unknown 23 | name: EXAONE 3.0 Instruction Tuned Language Model 24 | nationality: South Korea 25 | organization: LG AI Research 26 | prohibited_uses: Commercial use is not intended for this model. Its intended use 27 | is for non-commercial research and innovation. 28 | quality_control: Extensive pre-training on a diverse dataset, and advanced post-training 29 | techniques were employed to enhance instruction-following capabilities. The model 30 | was also trained to fully comply with data handling standards. 31 | size: 7.8B parameters (dense) 32 | training_emissions: Unknown 33 | training_hardware: Unknown 34 | training_time: Unknown 35 | type: model 36 | url: https://arxiv.org/pdf/2408.03541 37 | -------------------------------------------------------------------------------- /assets/linkedin.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | adaptation: '' 3 | created_date: unknown 4 | dependencies: 5 | - Azure Cognitive Services for Vision 6 | description: More than 40 percent of LinkedIn's feed posts include at least one 7 | image. We want every member to have equal access to opportunity and are committed 8 | to ensuring that we make images accessible to our members who are blind or who 9 | have low vision so they can be a part of the online conversation. With Azure Cognitive 10 | Service for Vision, we can provide auto-captioning to edit and support alt. text 11 | descriptions. 12 | failures: '' 13 | feedback: '' 14 | intended_uses: '' 15 | license: unknown 16 | monitoring: '' 17 | monthly_active_users: '' 18 | name: LinkedIn 19 | nationality: USA 20 | organization: LinkedIn 21 | output_space: '' 22 | prohibited_uses: '' 23 | quality_control: '' 24 | terms_of_service: '' 25 | type: application 26 | url: https://www.linkedin.com/ 27 | user_distribution: '' 28 | -------------------------------------------------------------------------------- /assets/llm360.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on several benchmark LLM tasks 3 | created_date: 2023-12-12 4 | dependencies: 5 | - Arxiv 6 | - Books 7 | - C4 8 | - RefinedWeb 9 | - StarCoder 10 | - StackExchange 11 | - Wikipedia 12 | description: Amber is the first model in the LLM360 family, an initiative for comprehensive 13 | and fully open-sourced LLMs, where all training details, model checkpoints, intermediate 14 | results, and additional analyses are made available to the community. 15 | feedback: https://huggingface.co/LLM360/Amber/discussions 16 | intended_uses: to support open and collaborative AI research by making the full 17 | LLM training process transparent. 18 | license: Apache 2.0 19 | modality: text; text 20 | model_card: https://huggingface.co/LLM360/Amber 21 | monitoring: unknown 22 | name: Amber 23 | nationality: International 24 | organization: LLM360 25 | prohibited_uses: '' 26 | quality_control: '' 27 | size: 7B parameters (dense) 28 | training_emissions: unknown 29 | training_hardware: 56 DGX A100 nodes, each equipped with 4 80GB A100 GPUs 30 | training_time: unknown 31 | type: model 32 | url: https://www.llm360.ai/ 33 | - access: open 34 | analysis: Evaluated on English and coding tasks and benchmarks, and outperforms 35 | LLaMA 2 in some. 36 | created_date: 2023-12-12 37 | dependencies: 38 | - SlimPajama 39 | - StarCoder 40 | description: CrystalCoder is a language model with a balance of code and text data 41 | that follows the initiative under LLM360 of its training process being fully transparent. 42 | feedback: https://huggingface.co/LLM360/CrystalCoder/discussions 43 | intended_uses: to support open and collaborative AI research by making the full 44 | LLM training process transparent. 45 | license: Apache 2.0 46 | modality: text; code, text 47 | model_card: https://huggingface.co/LLM360/CrystalCoder 48 | monitoring: unknown 49 | name: CrystalCoder 50 | nationality: International 51 | organization: LLM360 52 | prohibited_uses: '' 53 | quality_control: '' 54 | size: 7B parameters (dense) 55 | training_emissions: unknown 56 | training_hardware: Trained on the Cerebras Condor Galaxy 1 (CG-1), a 4 exaFLOPS, 57 | 54 million core, 64-node cloud AI supercomputer. 58 | training_time: unknown 59 | type: model 60 | url: https://www.llm360.ai/ 61 | - access: open 62 | analysis: Evaluated on the LLM360 Performance and Evaluation Collection that checks 63 | standard best practice benchmarks, medical, math, and coding knowledge. 64 | created_date: 2024-05-29 65 | dependencies: [] 66 | description: K2 is a 65 billion parameter large language model inspired by the Llama 67 | 2 65B model. The model is also supported with a suite of research tools, tutorials 68 | and step-by-step guides for learning pre-training and fine-tuning techniques. 69 | feedback: https://huggingface.co/LLM360/K2/discussions 70 | intended_uses: The model is intended for learning pre-training techniques or enhancing 71 | research capabilities in large language models. 72 | license: Apache 2.0 73 | modality: text; text 74 | model_card: https://huggingface.co/LLM360/K2 75 | monitoring: unknown 76 | name: K2 77 | nationality: International 78 | organization: LLM360 79 | prohibited_uses: unknown 80 | quality_control: unknown 81 | size: 65B parameters 82 | training_emissions: unknown 83 | training_hardware: unknown 84 | training_time: unknown 85 | type: model 86 | url: https://www.llm360.ai/paper2.pdf 87 | -------------------------------------------------------------------------------- /assets/lmsys.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated against similar LLMs using GPT-4 as a judge. 3 | created_date: 2023-03-30 4 | dependencies: 5 | - LLaMA 6 | - ShareGPT conversations data 7 | description: An open-source chatbot trained by fine-tuning LLaMA on user-shared 8 | conversations collected from ShareGPT. 9 | feedback: https://huggingface.co/datasets/bigcode/the-stack/discussions 10 | intended_uses: research on LLMs and chatbots 11 | license: Apache 2.0 12 | modality: text; text 13 | model_card: https://huggingface.co/lmsys/vicuna-13b-delta-v0 14 | monitoring: '' 15 | name: Vicuna 16 | nationality: USA 17 | organization: LMSYS 18 | prohibited_uses: '' 19 | quality_control: '' 20 | size: 13B parameters (dense) 21 | training_emissions: '' 22 | training_hardware: 8 A100 GPUs 23 | training_time: 1 day 24 | type: model 25 | url: https://lmsys.org/blog/2023-03-30-vicuna/ 26 | -------------------------------------------------------------------------------- /assets/mathai.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Mistral model fine-tuned on AutoMathText and evaluated on the MATH dataset. 3 | created_date: 2024-02-12 4 | datasheet: https://huggingface.co/datasets/math-ai/AutoMathText 5 | dependencies: 6 | - OpenWebMath 7 | - RedPajama-Data 8 | - Algebraic Stack 9 | - Qwen 10 | description: AutoMathText is an extensive and carefully curated dataset encompassing 11 | around 200 GB of mathematical texts. 12 | excluded: '' 13 | feedback: https://huggingface.co/datasets/math-ai/AutoMathText/discussions 14 | included: '' 15 | intended_uses: '' 16 | license: CC BY-SA 4.0 17 | modality: text 18 | monitoring: unknown 19 | name: AutoMathText 20 | nationality: USA 21 | organization: Math AI 22 | prohibited_uses: '' 23 | quality_control: '' 24 | sample: [] 25 | size: 200 GB 26 | type: dataset 27 | url: https://github.com/yifanzhang-pro/AutoMathText 28 | -------------------------------------------------------------------------------- /assets/maya.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on the OpenLLM leaderboard, releasing at rank number 4 on the 3 | leaderboard. 4 | created_date: 2023-08-11 5 | dependencies: 6 | - LLaMA 2 7 | - Guanaco LLaMA dataset 8 | description: GodziLLa 2 is an experimental combination of various proprietary LoRAs 9 | from Maya Philippines and Guanaco LLaMA 2 1K dataset, with LLaMA 2. 10 | feedback: none 11 | intended_uses: '' 12 | license: LLaMA 2 13 | modality: text; text 14 | model_card: https://huggingface.co/MayaPH/GodziLLa2-70B 15 | monitoring: unknown 16 | name: GodziLLa 2 17 | nationality: Philippines 18 | organization: Maya Philippines 19 | prohibited_uses: '' 20 | quality_control: '' 21 | size: 70B parameters (dense) 22 | training_emissions: unknown 23 | training_hardware: unknown 24 | training_time: unknown 25 | type: model 26 | url: https://huggingface.co/MayaPH/GodziLLa2-70B 27 | - access: 28 | explanation: The first 200 million tokens are free. 29 | value: limited 30 | analysis: We evaluated voyage-code-3 using an enhanced suite of evaluation datasets 31 | designed to address the shortcomings of existing benchmarks and deliver practical, 32 | robust results. 33 | created_date: 2024-12-04 34 | dependencies: [] 35 | description: Introducing voyage-code-3, our next-generation embedding model optimized 36 | for code retrieval. 37 | feedback: "If you\u2019re also interested in fine-tuned embedding models, we\u2019\ 38 | d love to hear from you\u2014please email us at contact@voyageai.com." 39 | intended_uses: optimized for code retrieval 40 | license: unknown 41 | modality: unknown 42 | model_card: unknown 43 | monitoring: unknown 44 | name: voyage-code-3 45 | nationality: USA 46 | organization: Voyage AI 47 | prohibited_uses: unknown 48 | quality_control: unknown 49 | size: unknown 50 | training_emissions: unknown 51 | training_hardware: unknown 52 | training_time: unknown 53 | type: model 54 | url: https://blog.voyageai.com/2024/12/04/voyage-code-3/ 55 | -------------------------------------------------------------------------------- /assets/moonhub.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | adaptation: '' 3 | created_date: 2023-10-11 4 | dependencies: 5 | - Cohere Base 6 | description: Moonhub Recruiter is the world's first AI-powered recruiter providing 7 | sourcing and recruiting services for startups and growing businesses. 8 | failures: '' 9 | feedback: '' 10 | intended_uses: recruiting candidates for business needs 11 | license: 12 | explanation: License information can be found in terms of service 13 | value: custom 14 | monitoring: '' 15 | monthly_active_users: unknown 16 | name: Moonhub Recruiter 17 | nationality: USA 18 | organization: Moonhub 19 | output_space: job candidate matches 20 | prohibited_uses: none 21 | quality_control: '' 22 | terms_of_service: https://www.moonhub.ai/terms 23 | type: application 24 | url: https://www.moonhub.ai/ 25 | user_distribution: unknown 26 | -------------------------------------------------------------------------------- /assets/moreh.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: unknown 3 | created_date: 2024-01-16 4 | dependencies: 5 | - Qwen 6 | - OpenOrca 7 | description: MoMo is a large language model fine-tuned from Qwen. 8 | feedback: https://huggingface.co/moreh/MoMo-72B-lora-1.8.7-DPO/discussions 9 | intended_uses: '' 10 | license: MIT 11 | modality: text; text 12 | model_card: https://huggingface.co/moreh/MoMo-72B-lora-1.8.7-DPO 13 | monitoring: unknown 14 | name: MoMo 15 | nationality: USA 16 | organization: Moreh 17 | prohibited_uses: '' 18 | quality_control: unknown 19 | size: 72B parameters (dense) 20 | training_emissions: unknown 21 | training_hardware: "AMD\u2019s MI250 GPU" 22 | training_time: unknown 23 | type: model 24 | url: https://huggingface.co/moreh/MoMo-72B-lora-1.8.7-DPO 25 | -------------------------------------------------------------------------------- /assets/mosaic.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on a range of benchmarks and performed on par with LLaMA-7B. 3 | created_date: 2023-05-05 4 | dependencies: 5 | - RedPajama-Data 6 | - C4 7 | - The Stack 8 | - Multimodal C4 9 | description: MPT is a series of large language models seeking to address the limitations 10 | of other open source models like LLaMA and Pythia. 11 | feedback: '' 12 | intended_uses: '' 13 | license: Apache 2.0 14 | modality: text; text 15 | model_card: '' 16 | monitoring: '' 17 | name: MPT 18 | nationality: USA 19 | organization: Mosaic 20 | prohibited_uses: '' 21 | quality_control: '' 22 | size: 7B parameters (dense) 23 | training_emissions: unknown 24 | training_hardware: 440 A100 40GB GPUs 25 | training_time: 9.5 days 26 | type: model 27 | url: https://www.mosaicml.com/blog/mpt-7b 28 | - access: open 29 | analysis: Compared to Stable Diffusion 2, a SOTA text-to-image model. 30 | created_date: 2023-10-25 31 | dependencies: 32 | - CommonCatalog 33 | description: CommonCanvas is a text-to-image model trained solely on Creative Commons 34 | licensed images. 35 | feedback: none 36 | intended_uses: '' 37 | license: Apache 2.0 38 | modality: text; image 39 | model_card: none 40 | monitoring: none 41 | name: CommonCanvas 42 | nationality: unknown 43 | organization: Cornell University, Mosaic 44 | prohibited_uses: '' 45 | quality_control: '' 46 | size: unknown 47 | training_emissions: unknown 48 | training_hardware: 128 A100 NVIDIA GPUs 49 | training_time: 6.79 days 50 | type: model 51 | url: https://arxiv.org/pdf/2310.16825.pdf 52 | - access: open 53 | analysis: none 54 | created_date: 2023-10-25 55 | datasheet: https://github.com/mosaicml/diffusion/blob/main/assets/common-canvas.md 56 | dependencies: 57 | - YFCC100M 58 | - BLIP-2 59 | description: CommonCatalog is a curated dataset of CommonCrawl images and synthetic 60 | captions. 61 | excluded: images with non-derivative licenses 62 | feedback: none 63 | included: images with derivative licenses 64 | intended_uses: '' 65 | license: Apache 2.0 66 | modality: image-caption pairings 67 | monitoring: '' 68 | name: CommonCatalog 69 | nationality: USA 70 | organization: Mosaic 71 | prohibited_uses: '' 72 | quality_control: '' 73 | sample: [] 74 | size: 70M images 75 | type: dataset 76 | url: https://arxiv.org/pdf/2310.16825.pdf 77 | - access: open 78 | analysis: unknown 79 | created_date: 2024-10-08 80 | dependencies: [] 81 | description: XTTS-v2 is a voice generation model that allows voice cloning into 82 | different languages using a brief 6-second audio clip, supporting 17 languages 83 | with features like emotion and style transfer, cross-language voice cloning, and 84 | multi-lingual speech generation. It powers Coqui Studio and Coqui API, with improvements 85 | in architectural and prosody aspects for better audio quality. 86 | feedback: Users can join the Coqui community on Discord, engage on Twitter, or send 87 | emails to info@coqui.ai for feedback and queries. 88 | intended_uses: Voice cloning, multi-lingual speech generation, emotion and style 89 | transfer in speech. 90 | license: Coqui Public Model 91 | modality: audio; audio 92 | model_card: https://huggingface.co/coqui/XTTS-v2 93 | monitoring: unknown 94 | name: XTTS-v2 95 | nationality: USA 96 | organization: Coqui 97 | prohibited_uses: unknown 98 | quality_control: unknown 99 | size: unknown 100 | training_emissions: unknown 101 | training_hardware: unknown 102 | training_time: unknown 103 | type: model 104 | url: https://huggingface.co/coqui/XTTS-v2 105 | -------------------------------------------------------------------------------- /assets/nanyang.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on researcher experiments to test deeper understanding and advanced 3 | commonsense reasoning 4 | created_date: 2023-05-05 5 | dependencies: 6 | - MIMIC-IT 7 | - OpenFlamingo 8 | description: "Otter is a multi-modal model based on OpenFlamingo (open-sourced version\ 9 | \ of DeepMind\u2019s Flamingo), trained on MIMIC-IT and showcasing improved instruction-following\ 10 | \ ability and in-context learning." 11 | feedback: '' 12 | intended_uses: Following and executing new instructions with few in-context learning 13 | examples given image and textual input. 14 | license: MIT 15 | modality: image, text; text 16 | model_card: https://github.com/Luodian/Otter/blob/main/docs/model_card.md 17 | monitoring: '' 18 | name: Otter 19 | nationality: Singapore 20 | organization: Nanyang Technological University 21 | prohibited_uses: '' 22 | quality_control: '' 23 | size: 1.3B parameters (dense) 24 | training_emissions: '' 25 | training_hardware: 4 RTX-3090 GPUs 26 | training_time: '' 27 | type: model 28 | url: https://arxiv.org/pdf/2305.03726v1.pdf 29 | -------------------------------------------------------------------------------- /assets/naver.yaml: -------------------------------------------------------------------------------- 1 | - access: 2 | explanation: The model is not available for download or direct API use 3 | value: closed 4 | analysis: '' 5 | created_date: 6 | explanation: The date the model paper was submitted to arxiv 7 | value: 2021-05-21 8 | dependencies: [] 9 | description: HyperClova is an autoregressive language model 10 | feedback: '' 11 | intended_uses: '' 12 | license: unknown 13 | modality: text; text 14 | model_card: none 15 | monitoring: '' 16 | name: HyperCLOVA 17 | nationality: South Korea 18 | organization: NAVER 19 | prohibited_uses: '' 20 | quality_control: '' 21 | size: 82B parameters 22 | training_emissions: '' 23 | training_hardware: 1024 A100 GPUs 24 | training_time: 130.4 days 25 | type: model 26 | url: https://arxiv.org/abs/2109.04650 27 | - access: limited 28 | analysis: Evaluated on English and Korean benchmarks in comparison to open source 29 | English and multilingual LLMs, with HyperCLOVA X (closed) surpassing the models 30 | compared. 31 | created_date: 2024-04-13 32 | dependencies: [] 33 | description: HyperCLOVA X is a family of large language models (LLMs) tailored to 34 | the Korean language and culture, along with competitive capabilities in English, 35 | math, and coding. 36 | feedback: none 37 | intended_uses: '' 38 | license: unknown 39 | modality: text; text 40 | model_card: none 41 | monitoring: '' 42 | name: HyperCLOVA X 43 | nationality: South Korea 44 | organization: NAVER 45 | prohibited_uses: '' 46 | quality_control: '' 47 | size: unknown 48 | training_emissions: unknown 49 | training_hardware: unknown 50 | training_time: unknown 51 | type: model 52 | url: https://arxiv.org/pdf/2404.01954 53 | -------------------------------------------------------------------------------- /assets/ncsoft.yaml: -------------------------------------------------------------------------------- 1 | - access: closed 2 | analysis: Boasts the highest performance among the Korean LLMs of similar sizes 3 | that have been released to date, according to internal evaluations. 4 | created_date: 2023-08-16 5 | dependencies: [] 6 | description: "VARCO-LLM is NCSOFT\u2019s large language model and is trained on\ 7 | \ English and Korean." 8 | feedback: none 9 | intended_uses: Developing various NLP-based AI services such as Q&A, chatbot, summarization, 10 | information extraction 11 | license: 12 | explanation: Can be found at https://github.com/ncsoft/ncresearch/blob/main/LICENSE.txt 13 | value: custom 14 | modality: text; text 15 | model_card: none 16 | monitoring: '' 17 | name: VARCO-LLM 18 | nationality: South Korea 19 | organization: NCSOFT 20 | prohibited_uses: '' 21 | quality_control: '' 22 | size: 13B parameters 23 | training_emissions: unknown 24 | training_hardware: unknown 25 | training_time: unknown 26 | type: model 27 | url: https://github.com/ncsoft/ncresearch 28 | -------------------------------------------------------------------------------- /assets/neeva.yaml: -------------------------------------------------------------------------------- 1 | - access: closed 2 | analysis: '' 3 | created_date: unknown 4 | datasheet: '' 5 | dependencies: [] 6 | description: '' 7 | excluded: '' 8 | feedback: '' 9 | included: '' 10 | intended_uses: '' 11 | license: unknown 12 | modality: text 13 | monitoring: '' 14 | name: Neeva dataset 15 | nationality: USA 16 | organization: Neeva 17 | prohibited_uses: '' 18 | quality_control: '' 19 | sample: [] 20 | size: unknown 21 | type: dataset 22 | url: https://neeva.com/index 23 | - access: closed 24 | analysis: '' 25 | created_date: unknown 26 | dependencies: 27 | - Neeva dataset 28 | description: '' 29 | feedback: '' 30 | intended_uses: '' 31 | license: unknown 32 | modality: text; text 33 | model_card: '' 34 | monitoring: '' 35 | name: Neeva model 36 | nationality: USA 37 | organization: Neeva 38 | prohibited_uses: '' 39 | quality_control: '' 40 | size: unknown 41 | training_emissions: '' 42 | training_hardware: '' 43 | training_time: '' 44 | type: model 45 | url: https://neeva.com/index 46 | - access: open 47 | adaptation: '' 48 | created_date: 49 | explanation: The [[Blog post]](https://neeva.com/blog/introducing-neevaai) announcing 50 | NeevaAI. 51 | value: 2023-01-06 52 | dependencies: 53 | - Neeva model 54 | description: NeevaAI is an AI-powered search tool that combines the capabilities 55 | of LLMs with Neeva's independent in-house search stack to create a unique and 56 | transformative search experience. 57 | failures: '' 58 | feedback: '' 59 | intended_uses: '' 60 | license: 61 | explanation: '"If you comply with these Terms, Neeva grants to you, during the 62 | term of these Terms, a limited non-exclusive, non-transferable license, with 63 | no right to sublicense, to download and install the App on your personal computers, 64 | mobile handsets, tablets, wearable devices, and/or other devices and to run 65 | the App solely for your own personal non-commercial purposes." - excerpt from 66 | the Terms of Service document. 67 | 68 | ' 69 | value: Custom 70 | monitoring: '' 71 | monthly_active_users: '' 72 | name: NeevaAI 73 | nationality: USA 74 | organization: Neeva 75 | output_space: '' 76 | prohibited_uses: '' 77 | quality_control: '' 78 | terms_of_service: https://neeva.com/terms 79 | type: application 80 | url: https://neeva.com/blog/introducing-neevaai 81 | user_distribution: '' 82 | -------------------------------------------------------------------------------- /assets/nextdoor.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | adaptation: '' 3 | created_date: 2023-05-02 4 | dependencies: 5 | - ChatGPT 6 | description: AI chatbot on Nextdoor that helps users write more clear and conscientious 7 | posts. 8 | failures: '' 9 | feedback: '' 10 | intended_uses: to be used to help make the Nextdoor experience more positive for 11 | users 12 | license: unknown 13 | monitoring: '' 14 | monthly_active_users: '' 15 | name: Nextdoor Assistant 16 | nationality: USA 17 | organization: Nextdoor 18 | output_space: natural language text guidance 19 | prohibited_uses: '' 20 | quality_control: '' 21 | terms_of_service: '' 22 | type: application 23 | url: https://help.nextdoor.com/s/article/Introducing-Assistant 24 | user_distribution: '' 25 | -------------------------------------------------------------------------------- /assets/nolano.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | adaptation: '' 3 | created_date: 2023-03-19 4 | dependencies: [] 5 | description: Cformers is a set of transformers that act as an API for AI inference 6 | in code. 7 | failures: '' 8 | feedback: '' 9 | intended_uses: '' 10 | license: MIT 11 | monitoring: '' 12 | monthly_active_users: '' 13 | name: Cformers 14 | nationality: USA 15 | organization: Nolano 16 | output_space: '' 17 | prohibited_uses: '' 18 | quality_control: '' 19 | terms_of_service: '' 20 | type: application 21 | url: https://www.nolano.org/services/Cformers/ 22 | user_distribution: '' 23 | - access: 24 | explanation: "T\xFClu3 is intended for research and educational use." 25 | value: limited 26 | analysis: The model can produce problematic outputs (especially when prompted to 27 | do so). 28 | created_date: 2024-11-21 29 | dependencies: 30 | - Llama 3.1 31 | description: "T\xFClu3 is a leading instruction following model family, offering\ 32 | \ fully open-source data, code, and recipes designed to serve as a comprehensive\ 33 | \ guide for modern post-training techniques." 34 | feedback: unknown 35 | intended_uses: "T\xFClu3 is intended for research and educational use." 36 | license: 37 | explanation: "All Llama 3.1 T\xFClu3 models are released under Meta's Llama 3.1\ 38 | \ Community License Agreement." 39 | value: Llama 3.1 Community License Agreement 40 | modality: 41 | explanation: 'Language(s) (NLP): Primarily English' 42 | value: text; text 43 | model_card: https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B 44 | monitoring: unknown 45 | name: Llama 3.1 Tulu 3 46 | nationality: USA 47 | organization: Allen Institute for AI 48 | prohibited_uses: The model can produce problematic outputs (especially when prompted 49 | to do so). 50 | quality_control: "The T\xFClu3 models have limited safety training, but are not\ 51 | \ deployed automatically with in-the-loop filtering of responses like ChatGPT." 52 | size: 53 | explanation: Final Models (RLVR) allenai/Llama-3.1-Tulu-3-8B allenai/Llama-3.1-Tulu-3-70B 54 | value: 70B parameters 55 | training_emissions: unknown 56 | training_hardware: unknown 57 | training_time: unknown 58 | type: model 59 | url: https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B 60 | -------------------------------------------------------------------------------- /assets/notion.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | adaptation: '' 3 | created_date: 2023-02-22 4 | dependencies: 5 | - Anthropic API 6 | description: "Notion AI is a connected assistant that helps you think bigger, work\ 7 | \ faster, and augments your creativity, right inside the functional workspace\ 8 | \ you\u2019re already familiar with." 9 | failures: '' 10 | feedback: '' 11 | intended_uses: '' 12 | license: '' 13 | monitoring: '' 14 | monthly_active_users: '' 15 | name: Notion AI 16 | nationality: USA 17 | organization: Notion 18 | output_space: '' 19 | prohibited_uses: '' 20 | quality_control: '' 21 | terms_of_service: '' 22 | type: application 23 | url: https://www.notion.so/help/guides/notion-ai-for-docs 24 | user_distribution: '' 25 | -------------------------------------------------------------------------------- /assets/nucleus.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on the OpenLLM leaderboard, performing on par with similar-sized 3 | models. 4 | created_date: 2023-10-05 5 | dependencies: 6 | - RefinedWeb 7 | description: Nucleus is a 22B parameters causal decoder-only model built by Nucleus.AI 8 | and trained on 500B tokens of RefinedWeb along with curated corpora. 9 | feedback: https://huggingface.co/NucleusAI/nucleus-22B-token-500B/discussions 10 | intended_uses: Research on large language models; as a foundation for further specialization 11 | and finetuning for specific usecases (e.g., summarization, text generation, chatbot, 12 | etc.) 13 | license: MIT 14 | modality: text; text 15 | model_card: https://huggingface.co/NucleusAI/nucleus-22B-token-500B 16 | monitoring: unknown 17 | name: Nucleus 18 | nationality: USA 19 | organization: Nucleus.AI 20 | prohibited_uses: Production use without adequate assessment of risks and mitigation; 21 | any use cases which may be considered irresponsible or harmful. 22 | quality_control: '' 23 | size: 22B parameters (dense) 24 | training_emissions: unknown 25 | training_hardware: unknown 26 | training_time: 2 weeks 27 | type: model 28 | url: https://www.withnucleus.ai/ 29 | -------------------------------------------------------------------------------- /assets/nvidia.yaml: -------------------------------------------------------------------------------- 1 | - access: 2 | explanation: 'Neither the 8.3B parameter model trained to convergence nor the 3 | 1 trillion paramter model is available for download 4 | 5 | ' 6 | value: closed 7 | analysis: '' 8 | created_date: 9 | explanation: The date the paper for the 1 trillion parameter model was published 10 | value: 2021-04-09 11 | dependencies: [] 12 | description: Megatron-LM is an autoregressive language model 13 | feedback: none 14 | intended_uses: none 15 | license: unknown 16 | modality: text; text 17 | model_card: none 18 | monitoring: none 19 | name: Megatron-LM 20 | nationality: USA 21 | organization: NVIDIA 22 | prohibited_uses: none 23 | quality_control: unknown 24 | size: 1T parameters (dense) 25 | training_emissions: unknown 26 | training_hardware: 3072 A100 GPUs 27 | training_time: 84 days 28 | type: model 29 | url: https://arxiv.org/abs/2104.04473 30 | - access: open 31 | analysis: '' 32 | created_date: 2022-06-17 33 | datasheet: '' 34 | dependencies: 35 | - YouTube 36 | - Wikipedia 37 | - Reddit 38 | description: '' 39 | excluded: '' 40 | feedback: '' 41 | included: '' 42 | intended_uses: '' 43 | license: MIT 44 | modality: text, video 45 | monitoring: '' 46 | name: MineDojo 47 | nationality: USA 48 | organization: NVIDIA 49 | prohibited_uses: '' 50 | quality_control: '' 51 | sample: [] 52 | size: 730k videos, 6k Wikipedia pages, 340k reddit posts 53 | type: dataset 54 | url: https://arxiv.org/abs/2206.08853 55 | - access: open 56 | analysis: '' 57 | created_date: 2022-10-06 58 | datasheet: '' 59 | dependencies: 60 | - T5 61 | - Mask R-CNN 62 | - VIMA dataset 63 | description: '' 64 | excluded: '' 65 | feedback: '' 66 | included: '' 67 | intended_uses: '' 68 | license: MIT 69 | modality: image, text 70 | monitoring: '' 71 | name: VIMA dataset 72 | nationality: unknown 73 | organization: NVIDIA, Stanford 74 | prohibited_uses: '' 75 | quality_control: '' 76 | sample: [] 77 | size: 200M parameters (dense model) 78 | type: dataset 79 | url: https://vimalabs.github.io/ 80 | - access: open 81 | analysis: '' 82 | created_date: 2022-10-06 83 | dependencies: [] 84 | description: '' 85 | feedback: '' 86 | intended_uses: '' 87 | license: MIT 88 | modality: image, text; robotics trajectories 89 | model_card: '' 90 | monitoring: '' 91 | name: VIMA 92 | nationality: unknown 93 | organization: NVIDIA, Stanford 94 | prohibited_uses: '' 95 | quality_control: '' 96 | size: 200M parameters (dense) 97 | training_emissions: '' 98 | training_hardware: '' 99 | training_time: '' 100 | type: model 101 | url: https://vimalabs.github.io/ 102 | - access: open 103 | analysis: Evaluated on standard LLM benchmarks across a range of fields like reasoning, 104 | code generation, and mathematical skills. 105 | created_date: 2024-02-27 106 | dependencies: [] 107 | description: Nemotron 4 is a 15-billion-parameter large multilingual language model 108 | trained on 8 trillion text tokens. 109 | feedback: none 110 | intended_uses: '' 111 | license: unknown 112 | modality: text; code, text 113 | model_card: none 114 | monitoring: unknown 115 | name: Nemotron 4 116 | nationality: USA 117 | organization: Nvidia 118 | prohibited_uses: '' 119 | quality_control: Deduplication and quality filtering techniques are applied to the 120 | training dataset. 121 | size: 15B parameters (dense) 122 | training_emissions: unknown 123 | training_hardware: 3072 H100 80GB SXM5 GPUs across 384 DGX H100 nodes 124 | training_time: 13 days 125 | type: model 126 | url: https://arxiv.org/pdf/2402.16819.pdf 127 | -------------------------------------------------------------------------------- /assets/oasst.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: none 3 | created_date: 2023-08-23 4 | dependencies: 5 | - LLaMA 2 6 | description: OpenAssistant LLaMA 2 is an Open-Assistant fine-tuning of Meta's LLaMA 7 | 2. 8 | feedback: https://huggingface.co/OpenAssistant/llama2-70b-oasst-sft-v10/discussions 9 | intended_uses: '' 10 | license: LLaMA 2 11 | modality: text; text 12 | model_card: https://huggingface.co/OpenAssistant/llama2-70b-oasst-sft-v10 13 | monitoring: unknown 14 | name: OpenAssistant LLaMA 2 15 | nationality: International 16 | organization: OpenAssistant 17 | prohibited_uses: '' 18 | quality_control: '' 19 | size: 70B parameters (dense) 20 | training_emissions: unknown 21 | training_hardware: unknown 22 | training_time: unknown 23 | type: model 24 | url: https://huggingface.co/OpenAssistant/llama2-70b-oasst-sft-v10 25 | -------------------------------------------------------------------------------- /assets/ollama.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Mainly evaluated on MT-Bench and AlpacaEval, which are GPT-4-based comparisons. 3 | created_date: 2023-11-02 4 | dependencies: [] 5 | description: Starling is a large language model trained by reinforcement learning 6 | from AI feedback focused on improving chatbot helpfulness. 7 | feedback: https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha/discussions 8 | intended_uses: Academic research and free commercial usage 9 | license: CC BY NC 4.0 10 | modality: text; text 11 | model_card: https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha 12 | monitoring: none 13 | name: Starling 14 | nationality: USA 15 | organization: Ollama 16 | prohibited_uses: '' 17 | quality_control: '' 18 | size: 7B parameters (dense) 19 | training_emissions: unknown 20 | training_hardware: unknown 21 | training_time: unknown 22 | type: model 23 | url: https://starling.cs.berkeley.edu/ 24 | -------------------------------------------------------------------------------- /assets/openlemur.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on text and code benchmarks in comparison to other models. 3 | created_date: 2023-10-10 4 | dependencies: 5 | - LLaMA 2 6 | - The Stack 7 | - RefinedWeb 8 | - RedPajama 9 | - Common Crawl 10 | - Wikipedia 11 | - ArXiv 12 | description: Lemur is an openly accessible language model optimized for both natural 13 | language and coding capabilities to serve as the backbone of versatile language 14 | agents. 15 | feedback: https://huggingface.co/OpenLemur/lemur-70b-v1/discussions 16 | intended_uses: '' 17 | license: LLaMA2 18 | modality: code, text; code, text 19 | model_card: https://huggingface.co/OpenLemur/lemur-70b-v1 20 | monitoring: '' 21 | name: Lemur 22 | nationality: USA 23 | organization: OpenLemur 24 | prohibited_uses: '' 25 | quality_control: '' 26 | size: 70B parameters (dense) 27 | training_emissions: unknown 28 | training_hardware: TPUv4-512 pod 29 | training_time: unknown 30 | type: model 31 | url: https://arxiv.org/pdf/2310.06830.pdf 32 | - access: open 33 | analysis: Evaluated on text and code benchmarks in comparison to other models. 34 | created_date: 2023-10-10 35 | dependencies: 36 | - Lemur 37 | - OpenAssistant 1 38 | - OpenOrca 39 | - ShareGPT & ChatLogs 40 | - Evol-CodeAlpaca data 41 | description: Lemur-Chat is an openly accessible language model optimized for both 42 | natural language and coding capabilities to serve as the backbone of versatile 43 | language agents. 44 | feedback: https://huggingface.co/OpenLemur/lemur-70b-chat-v1/discussions 45 | intended_uses: '' 46 | license: CC-BY-NC-4.0 47 | modality: text; text 48 | model_card: https://huggingface.co/OpenLemur/lemur-70b-chat-v1 49 | monitoring: '' 50 | name: Lemur-Chat 51 | nationality: USA 52 | organization: OpenLemur 53 | prohibited_uses: '' 54 | quality_control: '' 55 | size: 70B parameters (dense) 56 | training_emissions: unknown 57 | training_hardware: unknown 58 | training_time: unknown 59 | type: model 60 | url: https://arxiv.org/pdf/2310.06830.pdf 61 | -------------------------------------------------------------------------------- /assets/orion.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on multilingual and NLP benchmarks in comparison with SoTA models 3 | of comparable size. 4 | created_date: 2024-01-20 5 | dependencies: [] 6 | description: Orion series models are open-source multilingual large language models 7 | trained from scratch by OrionStarAI. 8 | feedback: https://huggingface.co/OrionStarAI/Orion-14B-Base/discussions 9 | intended_uses: '' 10 | license: 11 | explanation: Model license can be found at https://github.com/OrionStarAI/Orion/blob/master/ModelsCommunityLicenseAgreement. 12 | Code license is under Apache 2.0 13 | value: custom 14 | modality: text; text 15 | model_card: https://huggingface.co/OrionStarAI/Orion-14B-Base 16 | monitoring: unknown 17 | name: Orion 18 | nationality: China 19 | organization: OrionStarAI 20 | prohibited_uses: '' 21 | quality_control: unknown 22 | size: 14B parameters (dense) 23 | training_emissions: unknown 24 | training_hardware: unknown 25 | training_time: unknown 26 | type: model 27 | url: https://github.com/OrionStarAI/Orion 28 | -------------------------------------------------------------------------------- /assets/osu.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on MATH, a competition-level dataset, and achieves a 46% accuracy, 3 | higher than accuracy produced by GPT-4's chain of thought. 4 | created_date: 2023-09-11 5 | dependencies: 6 | - MathInstruct 7 | - LLaMA 8 | - Code LLaMA 9 | description: MAmmoTH is a series of open-source large language models (LLMs) specifically 10 | tailored for general math problem-solving. 11 | feedback: '' 12 | intended_uses: '' 13 | license: MIT 14 | modality: text; text 15 | model_card: '' 16 | monitoring: '' 17 | name: MAmmoTH 18 | nationality: USA 19 | organization: Ohio State University 20 | prohibited_uses: '' 21 | quality_control: '' 22 | size: 34B parameters (dense) 23 | training_emissions: '' 24 | training_hardware: '' 25 | training_time: '' 26 | type: model 27 | url: https://arxiv.org/pdf/2309.05653.pdf 28 | -------------------------------------------------------------------------------- /assets/othersideai.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | adaptation: unknown 3 | created_date: unknown 4 | dependencies: 5 | - OpenAI API 6 | description: 'HyperWrite is a writing assistant that generates text based on a user''s 7 | request, as well as style and tone choices. 8 | 9 | ' 10 | failures: unknown 11 | feedback: unknown 12 | intended_uses: 'HyperWrite is intended to be used as a writing assistant. 13 | 14 | ' 15 | license: 16 | explanation: '"The Software, including software embedded in the Software, is licensed, 17 | not sold, to you by Otherside only under the terms of this Agreement; and Otherside 18 | reserves all rights not expressly granted under this Agreemen" - excerpt from 19 | the Terms of Service. 20 | 21 | ' 22 | value: custom 23 | monitoring: unknown 24 | monthly_active_users: unknown 25 | name: HyperWrite 26 | nationality: USA 27 | organization: OthersideAI 28 | output_space: Generation 29 | prohibited_uses: unknown 30 | quality_control: 31 | explanation: 'There is a HyperWrite blog warning the users about the risks of 32 | writing with an AI assistant, but the company doesn''t list the quality control 33 | measures taken to prevent the listed risks, if any [[HyperWrite Blog]] (https://blog.hyperwrite.ai/what-are-the-risks-of-ai-writing/). 34 | 35 | ' 36 | value: unknown 37 | terms_of_service: https://hyperwriteai.com/terms 38 | type: application 39 | url: https://hyperwriteai.com/ 40 | user_distribution: unknown 41 | -------------------------------------------------------------------------------- /assets/paladin.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | adaptation: '' 3 | created_date: 2023-02-01 4 | dependencies: [] 5 | description: UnderwriteGPT is the world's first generative AI underwriting tool. 6 | failures: '' 7 | feedback: '' 8 | intended_uses: '' 9 | license: '' 10 | monitoring: '' 11 | monthly_active_users: '' 12 | name: UnderwriteGPT 13 | nationality: USA 14 | organization: Paladin Group and Dais Technology 15 | output_space: '' 16 | prohibited_uses: '' 17 | quality_control: '' 18 | terms_of_service: '' 19 | type: application 20 | url: https://dais.com/underwritegpt/ 21 | user_distribution: '' 22 | -------------------------------------------------------------------------------- /assets/peking.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Compared with other multi-task, instruction-following agents. 3 | created_date: 2023-11-10 4 | dependencies: [] 5 | description: JARVIS-1 is an open-world agent that can perceive multimodal input 6 | (visual observations and human instructions), generate sophisticated plans, and 7 | perform embodied control, all within the popular yet challenging open-world Minecraft 8 | universe. 9 | feedback: none 10 | intended_uses: '' 11 | license: unknown 12 | modality: text; in-game actions 13 | model_card: none 14 | monitoring: none 15 | name: JARVIS-1 16 | nationality: China 17 | organization: Peking University Institute for Artificial Intelligence 18 | prohibited_uses: '' 19 | quality_control: '' 20 | size: unknown 21 | training_emissions: unknown 22 | training_hardware: unknown 23 | training_time: unknown 24 | type: model 25 | url: https://arxiv.org/pdf/2311.05997.pdf 26 | -------------------------------------------------------------------------------- /assets/perplexity.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | adaptation: '' 3 | created_date: 4 | explanation: Perplexity Ask was announced in a [[Twitter post]](https://twitter.com/perplexity_ai/status/1600551871554338816?s=20) 5 | by the company. 6 | value: 2022-12-07 7 | dependencies: 8 | - GPT-3.5 9 | - Bing Search 10 | description: Perplexity Ask is a new search interface that uses advanced artificial 11 | intelligence technologies 12 | failures: '' 13 | feedback: '' 14 | intended_uses: '' 15 | license: none 16 | monitoring: '' 17 | monthly_active_users: '' 18 | name: Perplexity Ask 19 | nationality: USA 20 | organization: Perplexity 21 | output_space: '' 22 | prohibited_uses: '' 23 | quality_control: '' 24 | terms_of_service: '' 25 | type: application 26 | url: https://www.perplexity.ai/ 27 | user_distribution: '' 28 | - access: closed 29 | adaptation: '' 30 | created_date: 2022-12-15 31 | dependencies: 32 | - Perplexity Ask 33 | - OpenAI API 34 | description: Twitter search interface that is powered by Perplexity's structured 35 | search engine. 36 | failures: '' 37 | feedback: '' 38 | intended_uses: '' 39 | license: none 40 | monitoring: '' 41 | monthly_active_users: '' 42 | name: Bird SQL 43 | nationality: USA 44 | organization: Perplexity 45 | output_space: '' 46 | prohibited_uses: '' 47 | quality_control: '' 48 | terms_of_service: '' 49 | type: application 50 | url: https://www.perplexity.ai/sql 51 | user_distribution: '' 52 | - access: open 53 | adaptation: '' 54 | created_date: 2023-10-27 55 | dependencies: [] 56 | description: Perplexity chat is an AI chatbot trained in-house by Perplexity. 57 | failures: '' 58 | feedback: '' 59 | intended_uses: '' 60 | license: none 61 | monitoring: '' 62 | monthly_active_users: '' 63 | name: Perplexity Chat 64 | nationality: USA 65 | organization: Perplexity 66 | output_space: Chatbot output in response to user queries 67 | prohibited_uses: '' 68 | quality_control: '' 69 | terms_of_service: https://blog.perplexity.ai/legal/terms-of-service 70 | type: application 71 | url: https://labs.perplexity.ai/ 72 | user_distribution: '' 73 | -------------------------------------------------------------------------------- /assets/pleias.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: unknown 3 | created_date: 2024-03-20 4 | datasheet: '' 5 | dependencies: [] 6 | description: Common Corpus is the largest public domain dataset released for training 7 | Large Language Models (LLMs). This dataset includes 500 billion words from a diverse 8 | range of cultural heritage initiatives and is the largest corpus in English, French, 9 | Dutch, Spanish, German and Italian. It supports efforts to train fully open LLMs 10 | on sources without copyright concerns. 11 | excluded: The data excluded are those that have copyright issues. 12 | feedback: unknown 13 | included: The dataset includes 500 billion words from a wide diversity of cultural 14 | heritage initiatives. It also has the largest English-speaking dataset to date 15 | with 180 billion words, including a major US collection of 21 million digitized 16 | newspapers and large monographs datasets collected by digital historian Sebastian 17 | Majstorovic. It also contains a huge volume of data in French (110 billion words), 18 | German (30 billion words), Spanish, Dutch and Italian, as well as data in low-resource 19 | languages that are currently underrepresented. 20 | intended_uses: The dataset is intended to support open and reproducible AI research, 21 | enhancing accessibility, diversity, and democracy in AI by enabling everyone to 22 | explore large models. 23 | license: none 24 | modality: text 25 | monitoring: unknown 26 | name: Common Corpus 27 | nationality: USA 28 | organization: Pleias 29 | prohibited_uses: It should not be used for tasks that infringe on copyright laws. 30 | quality_control: All data included in the corpus are from fully open and auditable 31 | sources, ensuring they are copyright-free. 32 | sample: [] 33 | size: 500 billion words 34 | type: dataset 35 | url: https://huggingface.co/blog/Pclanglais/common-corpus 36 | -------------------------------------------------------------------------------- /assets/portkey.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | adaptation: '' 3 | created_date: 2023-05-06 4 | dependencies: [] 5 | description: Portkey is a hosted middleware that allows users to create generative 6 | AI applications 7 | failures: '' 8 | feedback: '' 9 | intended_uses: '' 10 | license: '' 11 | monitoring: '' 12 | monthly_active_users: '' 13 | name: Portkey 14 | nationality: USA 15 | organization: Portkey 16 | output_space: generative AI apps 17 | prohibited_uses: '' 18 | quality_control: '' 19 | terms_of_service: https://portkey.ai/terms 20 | type: application 21 | url: https://portkey.ai/ 22 | user_distribution: '' 23 | -------------------------------------------------------------------------------- /assets/quizlet.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | adaptation: '' 3 | created_date: 2023-03-01 4 | dependencies: 5 | - ChatGPT API 6 | description: Quizlet is introducing Q-Chat, a fully-adaptive AI tutor that engages 7 | students with adaptive questions based on relevant study materials delivered through 8 | a fun chat experience. 9 | failures: '' 10 | feedback: '' 11 | intended_uses: '' 12 | license: none 13 | monitoring: '' 14 | monthly_active_users: '' 15 | name: Q-Chat 16 | nationality: USA 17 | organization: Quizlet 18 | output_space: '' 19 | prohibited_uses: '' 20 | quality_control: '' 21 | terms_of_service: https://quizlet.com/tos 22 | type: application 23 | url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis 24 | user_distribution: '' 25 | -------------------------------------------------------------------------------- /assets/quora.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | adaptation: '' 3 | created_date: 2023-02-03 4 | dependencies: 5 | - ChatGPT API 6 | - GPT-4 API 7 | - Claude API 8 | - Dragonfly API 9 | - Sage API 10 | description: Poe lets people ask questions, get instant answers, and have back-and-forth 11 | conversations with several AI-powered bots. It is initially available on iOS, 12 | but we will be adding support for all major platforms in the next few months, 13 | along with more bots. 14 | failures: '' 15 | feedback: '' 16 | intended_uses: '' 17 | license: none 18 | monitoring: '' 19 | monthly_active_users: '' 20 | name: Poe 21 | nationality: USA 22 | organization: Quora 23 | output_space: '' 24 | prohibited_uses: '' 25 | quality_control: '' 26 | terms_of_service: https://poe.com/tos 27 | type: application 28 | url: https://quorablog.quora.com/Poe-1 29 | user_distribution: '' 30 | -------------------------------------------------------------------------------- /assets/qwen_team.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Models have been evaluated on a series of math benchmarks, demonstrating 3 | outperformance of the state-of-the-art models in both the English and Chinese 4 | language. 5 | created_date: 2024-08-08 6 | dependencies: [] 7 | description: Qwen2-Math is a series of specialized math language models built upon 8 | the Qwen2 large language models, with a focus on enhancing the reasoning and mathematical 9 | capabilities. Their intended use is for solving complex mathematical problems. 10 | They significantly outperform both open-source and closed-source models in terms 11 | of mathematical capabilities. 12 | feedback: Problems with the model should be reported to the Qwen Team via their 13 | official channels. 14 | intended_uses: These models are intended for solving complex mathematical problems. 15 | license: Tongyi Qianwen 16 | modality: text; text 17 | model_card: https://huggingface.co/Qwen/Qwen2-Math-72B 18 | monitoring: Unknown 19 | name: Qwen2-Math 20 | nationality: China 21 | organization: Qwen Team 22 | prohibited_uses: Uses that go against the ethical usage policies of Qwen Team. 23 | quality_control: The models were tested with few-shot chain-of-thought prompting 24 | and evaluated across mathematical benchmarks in both English and Chinese. 25 | size: 72B parameters 26 | training_emissions: Unknown 27 | training_hardware: Unknown 28 | training_time: Unknown 29 | type: model 30 | url: https://qwenlm.github.io/blog/qwen2-math/ 31 | -------------------------------------------------------------------------------- /assets/rakuten.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: RakutenAI achieves the highest average score in both Japanese and English 3 | LM-Harness metrics, outperforming other similarly-sized Japanese language models. 4 | created_date: 2024-03-21 5 | dependencies: 6 | - Mistral 7 | description: RakutenAI-7B is a model developed with a focus on Japanese language 8 | understanding. It offers competitive performance on English tests as well. 9 | feedback: https://huggingface.co/Rakuten/RakutenAI-7B/discussions 10 | intended_uses: The model can be used for text generation tasks in both Japanese 11 | and English. 12 | license: Apache 2.0 13 | modality: text; text 14 | model_card: https://huggingface.co/Rakuten/RakutenAI-7B 15 | monitoring: unknown 16 | name: RakutenAI 17 | nationality: Japan 18 | organization: Rakuten 19 | prohibited_uses: unknown 20 | quality_control: unknown 21 | size: 7B parameters 22 | training_emissions: unknown 23 | training_hardware: unknown 24 | training_time: unknown 25 | type: model 26 | url: https://global.rakuten.com/corp/news/press/2024/0321_01.html 27 | -------------------------------------------------------------------------------- /assets/reexpress.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | adaptation: '' 3 | created_date: 2023-03-21 4 | dependencies: [] 5 | description: Reexpress One offers a means of document classification, semantic search, 6 | and uncertainty analysis on-device. 7 | failures: unknown 8 | feedback: https://github.com/ReexpressAI/support 9 | intended_uses: '' 10 | license: unknown 11 | monitoring: unknown 12 | monthly_active_users: unknown 13 | name: Reexpress One 14 | nationality: USA 15 | organization: Reexpress AI 16 | output_space: data analyses 17 | prohibited_uses: '' 18 | quality_control: '' 19 | terms_of_service: hhttps://re.express/tos.html 20 | type: application 21 | url: https://re.express/index.html 22 | user_distribution: unknown 23 | -------------------------------------------------------------------------------- /assets/reka.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | analysis: Evaluated on MMLU, GSM8K, HumanEval, and GPQA benchmarks, among others. 3 | created_date: 2024-02-12 4 | dependencies: [] 5 | description: Reka Flash is a multimodal, multilingual, state-of-the-art 21B model 6 | trained entirely from scratch. 7 | feedback: none 8 | intended_uses: '' 9 | license: unknown 10 | modality: image, text, video; text 11 | model_card: none 12 | monitoring: unknown 13 | name: Reka Flash 14 | nationality: USA 15 | organization: Reka 16 | prohibited_uses: '' 17 | quality_control: '' 18 | size: 21B parameters (dense) 19 | training_emissions: unknown 20 | training_hardware: unknown 21 | training_time: unknown 22 | type: model 23 | url: https://reka.ai/reka-flash-an-efficient-and-capable-multimodal-language-model/ 24 | - access: limited 25 | analysis: Reka Core was evaluated against leading models such as OpenAIs GPT-4, 26 | Claude-3 Opus, and Gemini Ultra on a variety of tasks and metrics including multimodal 27 | and human evaluation conducted by a third party. It was found to be competitive 28 | or even surpassing these models. 29 | created_date: 2024-04-15 30 | dependencies: [] 31 | description: Reka Core is a frontier-class multimodal language model comparable 32 | to industry leaders. It has powerful capabilities including multimodal understanding 33 | (including images, videos, and audio), superb reasoning abilities, code generation, 34 | and multilinguality with proficiency in 32 languages. 35 | feedback: unknown 36 | intended_uses: Reka Core can be used in e-commerce, social media, digital content 37 | and video games, healthcare, robotics, and other industries for tasks that require 38 | multimodal understanding, coding, complex reasoning, and more. 39 | license: unknown 40 | modality: audio, image, text, video; text 41 | model_card: none 42 | monitoring: unknown 43 | name: Reka Core 44 | nationality: USA 45 | organization: Reka 46 | prohibited_uses: unknown 47 | quality_control: '' 48 | size: unknown 49 | training_emissions: unknown 50 | training_hardware: thousands of GPUs 51 | training_time: few months 52 | type: model 53 | url: https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model 54 | -------------------------------------------------------------------------------- /assets/robin.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | adaptation: '' 3 | created_date: unknown 4 | dependencies: 5 | - Anthropic API 6 | description: Robin AI uses Claude and Anthropic's models to understand language 7 | - including in technical domains like legal language. It's also very confident 8 | at drafting, summarising, translations, and explaining complex concepts in simple 9 | terms 10 | failures: '' 11 | feedback: '' 12 | intended_uses: '' 13 | license: none 14 | monitoring: '' 15 | monthly_active_users: '' 16 | name: Robin AI 17 | nationality: USA 18 | organization: Robin AI 19 | output_space: '' 20 | prohibited_uses: '' 21 | quality_control: '' 22 | terms_of_service: https://www.robinai.co.uk/terms 23 | type: application 24 | url: https://www.robinai.co.uk/ 25 | user_distribution: '' 26 | -------------------------------------------------------------------------------- /assets/runway_ai,_inc..yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Unknown 3 | created_date: 2024-06-17 4 | dependencies: [] 5 | description: Gen-3 Alpha is a foundation model trained for large-scale multimodal 6 | tasks. It is a major improvement in fidelity, consistency, and motion over the 7 | previous generation, Gen-2. Gen-3 Alpha can power various tools, such as Text 8 | to Video, Image to Video, and Text to Image. The model excels at generating expressive 9 | human characters with a wide range of actions, gestures, and emotions, and is 10 | capable of interpreting a wide range of styles and cinematic terminology. It is 11 | also a step towards building General World Models. It has been designed for use 12 | by research scientists, engineers, and artists, and can be fine-tuned for customization 13 | according to specific stylistic and narrative requirements. 14 | feedback: Companies interested in fine-tuning and custom models can reach out to 15 | Runway AI, Inc. using a form on their website. 16 | intended_uses: Can be used to create expressive human characters, interpret a wide 17 | range of styles and cinematic terminology, and power tools for Text to Video, 18 | Image to Video, and Text to Image tasks. 19 | license: Terms of Use listed on Runway AI, Inc.'s website, specific license unknown 20 | modality: text, image, video; video 21 | model_card: unknown 22 | monitoring: The model includes a new and improved in-house visual moderation system. 23 | name: Gen-3 Alpha 24 | nationality: USA 25 | organization: Runway AI, Inc. 26 | prohibited_uses: Unknown 27 | quality_control: It will be released with a set of new safeguards, including an 28 | improved in-house visual moderation system and C2PA provenance standards. 29 | size: Unknown 30 | training_emissions: Unknown 31 | training_hardware: Unknown 32 | training_time: Unknown 33 | type: model 34 | url: https://runwayml.com/research/introducing-gen-3-alpha?utm_source=xinquji 35 | -------------------------------------------------------------------------------- /assets/rwkv.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: none 3 | created_date: 2023-05-03 4 | dependencies: [] 5 | description: RWKV World 4 is an RNN with GPT-level LLM performance, which can also 6 | be directly trained like a GPT transformer (parallelizable). 7 | feedback: https://huggingface.co/RWKV/rwkv-4-world-7b/discussions 8 | intended_uses: '' 9 | license: Apache 2.0 10 | modality: text; text 11 | model_card: https://huggingface.co/RWKV/rwkv-4-world-7b 12 | monitoring: unknown 13 | name: RWKV World 4 14 | nationality: China 15 | organization: RWKV 16 | prohibited_uses: '' 17 | quality_control: '' 18 | size: 7B parameters (dense) 19 | training_emissions: unknown 20 | training_hardware: unknown 21 | training_time: unknown 22 | type: model 23 | url: https://huggingface.co/RWKV/rwkv-4-world-7b 24 | - access: open 25 | analysis: none 26 | created_date: 2023-05-15 27 | dependencies: [] 28 | description: RWKV 4 Pile is an RNN with GPT-level LLM performance, which can also 29 | be directly trained like a GPT transformer (parallelizable). 30 | feedback: https://huggingface.co/RWKV/rwkv-4-14b-pile/discussions 31 | intended_uses: '' 32 | license: Apache 2.0 33 | modality: text; text 34 | model_card: https://huggingface.co/RWKV/rwkv-4-14b-pile 35 | monitoring: unknown 36 | name: RWKV 4 Pile 37 | nationality: China 38 | organization: RWKV 39 | prohibited_uses: '' 40 | quality_control: '' 41 | size: 14B parameters (dense) 42 | training_emissions: unknown 43 | training_hardware: unknown 44 | training_time: unknown 45 | type: model 46 | url: https://huggingface.co/RWKV/rwkv-4-14b-pile 47 | - access: open 48 | analysis: none 49 | created_date: 2023-12-16 50 | dependencies: [] 51 | description: RWKV World 5 is an RNN with GPT-level LLM performance, which can also 52 | be directly trained like a GPT transformer (parallelizable). 53 | feedback: https://huggingface.co/RWKV/rwkv-5-world-3b/discussions 54 | intended_uses: '' 55 | license: Apache 2.0 56 | modality: text; text 57 | model_card: https://huggingface.co/RWKV/rwkv-5-world-3b 58 | monitoring: unknown 59 | name: RWKV World 5 60 | nationality: China 61 | organization: RWKV 62 | prohibited_uses: '' 63 | quality_control: '' 64 | size: 3B parameters (dense) 65 | training_emissions: unknown 66 | training_hardware: unknown 67 | training_time: unknown 68 | type: model 69 | url: https://huggingface.co/RWKV/rwkv-5-world-3b 70 | -------------------------------------------------------------------------------- /assets/samba.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on open source multilingual model benchmarks. 3 | created_date: 2024-02-26 4 | dependencies: 5 | - Llama 2 6 | description: SambaLingo is a suite of models that adapt Llama 2 to a diverse set 7 | of 9 languages. 8 | feedback: 9 | explanation: The Arabic language model feedback is given, but there exists one 10 | for each of the other 8 languages in the collection. 11 | value: https://huggingface.co/sambanovasystems/SambaLingo-Arabic-Base/discussions 12 | intended_uses: '' 13 | license: LLaMA 2 14 | modality: text; text 15 | model_card: 16 | explanation: The Arabic language model card is given, but there exist one for 17 | each of the other 8 languages in the collection. 18 | value: https://huggingface.co/sambanovasystems/SambaLingo-Arabic-Base 19 | monitoring: '' 20 | name: SambaLingo 21 | nationality: USA 22 | organization: Samba Nova Systems 23 | prohibited_uses: SambaLingo should not be used for mission-critical applications, 24 | applications involving the safety of others, and highly critical decisions. 25 | quality_control: '' 26 | size: unknown 27 | training_emissions: unknown 28 | training_hardware: unknown 29 | training_time: unknown 30 | type: model 31 | url: https://sambanova.ai/blog/sambalingo-open-source-language-experts 32 | - access: limited 33 | analysis: unknown 34 | created_date: 2024-02-28 35 | dependencies: 36 | - Llama 2 37 | - Mistral 38 | - Falcon-180B 39 | - Deepseek 40 | - BLOOM 41 | - LLaVA 42 | - CLIP 43 | description: Samba 1 is a trillion parameter generative AI model using a Composition 44 | of Experts architecture. 45 | feedback: none 46 | intended_uses: '' 47 | license: unknown 48 | modality: text; text 49 | model_card: none 50 | monitoring: unknown 51 | name: Samba 1 52 | nationality: USA 53 | organization: Samba Nova Systems 54 | prohibited_uses: '' 55 | quality_control: '' 56 | size: 1T parameters (dense) 57 | training_emissions: unknown 58 | training_hardware: unknown 59 | training_time: unknown 60 | type: model 61 | url: https://sambanova.ai/blog/samba-1-composition-of-experts-mode 62 | -------------------------------------------------------------------------------- /assets/sana.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | adaptation: 'Customized GPT-3, fine-tuned on private data [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs). 3 | 4 | ' 5 | created_date: 6 | explanation: 'The company was founded in 2016 according to [[Crunchbase]](https://www.crunchbase.com/organization/sana-2). 7 | It is unclear when Sana adapted the OpenAI API to their products. 8 | 9 | ' 10 | value: unknown 11 | dependencies: 12 | - OpenAI API 13 | description: '"Sana is your all-in-one, AI-assisted, online learning platform (LMS). 14 | Author employee training courses and measure team development with Sana''s powerful 15 | analytics. Sana partners with the world''s most important organizations and fastest-growing 16 | startups to make personalized, adaptive learning available for everyone, everywhere" 17 | [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs). 18 | 19 | ' 20 | failures: unknown 21 | feedback: unknown 22 | intended_uses: 'Sana is intended to be used by employers to provide a learning service 23 | for their employees. 24 | 25 | ' 26 | license: 27 | explanation: "\"Sana Labs grants Subscriber a limited non-exclusive, non-transferable,\ 28 | \ non-sublicensable license to use Sana Labs' web-based personalized and collaborative\ 29 | \ learning platform (\u201CSana Platform\u201D) solely in connection with the\ 30 | \ subscribed Services in accordance with the Agreement.\" - excerpt from the\ 31 | \ Terms of Service.\n" 32 | value: custom 33 | monitoring: unknown 34 | monthly_active_users: unknown 35 | name: Sana 36 | nationality: USA 37 | organization: Sana 38 | output_space: question and answer, summarization, sentiment analysis,topic identification 39 | prohibited_uses: 40 | explanation: '"Sana provides an extensive set of legal documents [[Sana Legal]](https://www.sanalabs.com/legal/), 41 | but missing from the documents are prohibited uses of the Sana platform, beyond 42 | a mention of what is impermissible under relevant law." - exceprt from the Terms 43 | of Service document. 44 | 45 | ' 46 | value: none 47 | quality_control: unknown 48 | terms_of_service: https://www.sanalabs.com/legal/ 49 | type: application 50 | url: https://www.sanalabs.com/ 51 | user_distribution: unknown 52 | -------------------------------------------------------------------------------- /assets/sciphi.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: none 3 | created_date: 2023-11-07 4 | dependencies: 5 | - Mistral 6 | description: SciPhi Mistral is a Large Language Model (LLM) fine-tuned from Mistral. 7 | feedback: https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k/discussions 8 | intended_uses: '' 9 | license: MIT 10 | modality: text; text 11 | model_card: https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k 12 | monitoring: unknown 13 | name: SciPhi Mistral 14 | nationality: USA 15 | organization: SciPhi 16 | prohibited_uses: '' 17 | quality_control: '' 18 | size: 7B parameters (dense) 19 | training_emissions: unknown 20 | training_hardware: unknown 21 | training_time: unknown 22 | type: model 23 | url: https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k 24 | -------------------------------------------------------------------------------- /assets/shop.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | adaptation: '' 3 | created_date: 2023-03-01 4 | dependencies: 5 | - ChatGPT API 6 | description: "When shoppers search for products, the shopping assistant makes personalized\ 7 | \ recommendations based on their requests. Shop\u2019s new AI-powered shopping\ 8 | \ assistant will streamline in-app shopping by scanning millions of products to\ 9 | \ quickly find what buyers are looking for\u2014or help them discover something\ 10 | \ new." 11 | failures: '' 12 | feedback: '' 13 | intended_uses: '' 14 | license: '' 15 | monitoring: '' 16 | monthly_active_users: '' 17 | name: Shop Assistant 18 | nationality: USA 19 | organization: Shop 20 | output_space: '' 21 | prohibited_uses: '' 22 | quality_control: '' 23 | terms_of_service: '' 24 | type: application 25 | url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis 26 | user_distribution: '' 27 | -------------------------------------------------------------------------------- /assets/singapore.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Performance assessed on BIG-bench arithmetic sub-task, and various elementary 3 | arithmetic tasks. 4 | created_date: 2023-05-23 5 | dependencies: 6 | - LLaMA 7 | - GOAT dataset 8 | description: GOAT is a fine-tuned LLaMA model which uses the tokenization of numbers 9 | to significantly outperform benchmark standards on a range of arithmetic tasks. 10 | feedback: '' 11 | intended_uses: Integration into other instruction-tuned LLMs to further enhance 12 | arithmetic reasoning abilities in solving math word problems. 13 | license: Apache 2.0 14 | modality: text; text 15 | model_card: none 16 | monitoring: '' 17 | name: GOAT 18 | nationality: Singapore 19 | organization: National University of Singapore 20 | prohibited_uses: '' 21 | quality_control: Number data is randomly generated from log space to reduce likelihood 22 | of redundancy and range of magnitudes. 23 | size: 7B parameters (dense) 24 | training_emissions: unknown 25 | training_hardware: 24 GB VRAM GPU 26 | training_time: unknown 27 | type: model 28 | url: https://arxiv.org/pdf/2305.14201.pdf 29 | - access: open 30 | analysis: Evaluated on relatively simple established benchmarks. 31 | created_date: 2024-01-12 32 | dependencies: 33 | - RedPajama 34 | - The Stack 35 | description: OpenMoE is a series of fully open-sourced and reproducible decoder-only 36 | MoE LLMs. 37 | feedback: https://huggingface.co/OrionZheng/openmoe-base/discussions 38 | intended_uses: '' 39 | license: Apache 2.0 40 | modality: text; text 41 | model_card: https://huggingface.co/OrionZheng/openmoe-base 42 | monitoring: unknown 43 | name: OpenMoE 44 | nationality: unknown 45 | organization: National University of Singapore, University of Edinburgh, ETH Zurich 46 | prohibited_uses: '' 47 | quality_control: unknown 48 | size: 34B parameters (dense) 49 | training_emissions: unknown 50 | training_hardware: unknown 51 | training_time: unknown 52 | type: model 53 | url: https://github.com/XueFuzhao/OpenMoE 54 | -------------------------------------------------------------------------------- /assets/skt.yaml: -------------------------------------------------------------------------------- 1 | - access: closed 2 | analysis: none 3 | created_date: 2023-09-26 4 | dependencies: [] 5 | description: A.X is SK Telecom's proprietary LLM, which has been trained on the 6 | Korean language. 7 | feedback: none 8 | intended_uses: '' 9 | license: unknown 10 | modality: text; text 11 | model_card: none 12 | monitoring: '' 13 | name: A.X 14 | nationality: South Korea 15 | organization: SK Telecom 16 | prohibited_uses: '' 17 | quality_control: '' 18 | size: 39B parameters 19 | training_emissions: unknown 20 | training_hardware: unknown 21 | training_time: unknown 22 | type: model 23 | url: https://www.sktelecom.com/en/press/press_detail.do?idx=1582 24 | -------------------------------------------------------------------------------- /assets/snap.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | adaptation: '' 3 | created_date: 2023-03-01 4 | dependencies: 5 | - ChatGPT API 6 | description: My AI offers Snapchatters a friendly, customizable chatbot at their 7 | fingertips that offers recommendations, and can even write a haiku for friends 8 | in seconds. Snapchat, where communication and messaging is a daily behavior, has 9 | 750 million monthly Snapchatters. 10 | failures: '' 11 | feedback: '' 12 | intended_uses: '' 13 | license: 14 | explanation: '"Snap grants you a worldwide, royalty-free, non-assignable, non-exclusive, revocable, 15 | and non-sublicensable license to use the Services." - excerpt from the Terms 16 | of Service document. 17 | 18 | ' 19 | value: custom 20 | monitoring: '' 21 | monthly_active_users: '' 22 | name: My AI for Snapchat 23 | nationality: USA 24 | organization: Snap 25 | output_space: '' 26 | prohibited_uses: '' 27 | quality_control: '' 28 | terms_of_service: https://snap.com/terms 29 | type: application 30 | url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis 31 | user_distribution: '' 32 | -------------------------------------------------------------------------------- /assets/soochow.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated across different text benchmarks in English and Chinese. 3 | created_date: 2023-10-01 4 | dependencies: [] 5 | description: OpenBA is an open-sourced 15B bilingual (English + Chinese) asymmetric 6 | seq2seq model. 7 | feedback: https://huggingface.co/OpenBA/OpenBA-LM/discussions 8 | intended_uses: '' 9 | license: Apache 2.0 10 | modality: text; text 11 | model_card: https://huggingface.co/OpenBA/OpenBA-LM 12 | monitoring: none 13 | name: OpenBA 14 | nationality: China 15 | organization: Soochow University 16 | prohibited_uses: '' 17 | quality_control: '' 18 | size: 15B parameters (dense) 19 | training_emissions: 6.5 tCO2eq 20 | training_hardware: 8 NVIDIA A100-80GB GPUs 21 | training_time: 38k GPU hours 22 | type: model 23 | url: https://arxiv.org/pdf/2309.10706.pdf 24 | -------------------------------------------------------------------------------- /assets/speak.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | adaptation: '' 3 | created_date: 2023-03-01 4 | dependencies: 5 | - Whisper API 6 | description: Speak is an AI-powered language learning app focused on building the 7 | best path to spoken fluency and is the the fastest-growing English app in South 8 | Korea. 9 | failures: '' 10 | feedback: '' 11 | intended_uses: '' 12 | license: '' 13 | monitoring: '' 14 | monthly_active_users: '' 15 | name: Speak 16 | nationality: USA 17 | organization: Speak 18 | output_space: '' 19 | prohibited_uses: '' 20 | quality_control: '' 21 | terms_of_service: '' 22 | type: application 23 | url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis 24 | user_distribution: '' 25 | -------------------------------------------------------------------------------- /assets/spotify.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | adaptation: '' 3 | created_date: 2023-02-23 4 | dependencies: 5 | - ChatGPT API 6 | - Sonantic AI 7 | description: "The DJ is a personalized AI guide that knows you and your music taste\ 8 | \ so well that it can choose what to play for you. This feature, first rolling\ 9 | \ out in beta, will deliver a curated lineup of music alongside commentary around\ 10 | \ the tracks and artists we think you\u2019ll like in a stunningly realistic voice." 11 | failures: '' 12 | feedback: '' 13 | intended_uses: '' 14 | license: 15 | explanation: '"The Spotify software applications and the Content are licensed, 16 | not sold or transferred to you, and Spotify and its licensors retain ownership 17 | of all copies of the Spotify software applications and Content even after installation 18 | on your Devices." - excerpt from the Terms of Service document. 19 | 20 | ' 21 | value: custom 22 | monitoring: '' 23 | monthly_active_users: '' 24 | name: AI DJ 25 | nationality: Sweden 26 | organization: Spotify 27 | output_space: '' 28 | prohibited_uses: '' 29 | quality_control: '' 30 | terms_of_service: https://www.spotify.com/us/legal/end-user-agreement/ 31 | type: application 32 | url: https://newsroom.spotify.com/2023-02-22/spotify-debuts-a-new-ai-dj-right-in-your-pocket/ 33 | user_distribution: '' 34 | -------------------------------------------------------------------------------- /assets/stonybrook.yaml: -------------------------------------------------------------------------------- 1 | - access: 2 | explanation: The dataset is available for download on the [[SBU Captions Dataset 3 | website]](https://www.cs.rice.edu/~vo9/sbucaptions/), along with additional 4 | resources. 5 | value: open 6 | analysis: 7 | explanation: See [[Section 5]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf) 8 | value: 'Authors perform two quantitative evaluations for image captioning - direct 9 | user ratings of relevance and BLEU score. They also propose a new evaluation 10 | task: "we propose a new evaluation task where a user is presented with two photographs 11 | and one caption. The user must assign the caption to the most relevant image. 12 | For evaluation we use a query image, a random image and a generated caption." 13 | 14 | ' 15 | created_date: 16 | explanation: 'The date the [[paper]](https://papers.nips.cc/paper_files/paper/2011/hash/5dd9db5e033da9c6fb5ba83c7a7ebea9-Abstract.html) 17 | was published. 18 | 19 | ' 20 | value: 2011-12-12 21 | datasheet: none 22 | dependencies: 23 | - Flickr 24 | description: 'SBU Captions Dataset is a collection of 1 million images and associated 25 | captions from Flickr, filtered so that the descriptions are likely to refer to 26 | visual content. 27 | 28 | ' 29 | excluded: 30 | explanation: See [[Section 2]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf) 31 | value: '"This produces a very large, but noisy initial set of photographs with 32 | associated text. We filter this set of photos so that the descriptions attached 33 | to a picture are relevant and visually descriptive." 34 | 35 | ' 36 | feedback: '' 37 | included: 38 | explanation: See [[Section 2]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf) 39 | value: "\"To encourage visual descriptiveness in our collection, we select only\ 40 | \ those images with descriptions of satisfactory length based on observed lengths\ 41 | \ in visual descriptions. We also enforce that retained descriptions contain\ 42 | \ at least 2 words belonging to our term lists and at least one prepositional\ 43 | \ word, e.g. \u201Con\u201D, \u201Cunder\u201D which often indicate visible\ 44 | \ spatial relationships.\"\n" 45 | intended_uses: '' 46 | license: none 47 | modality: image, text 48 | monitoring: none 49 | name: SBU Captions 50 | nationality: USA 51 | organization: Stony Brook University 52 | prohibited_uses: '' 53 | quality_control: unknown 54 | sample: [] 55 | size: 1M image-text pairs 56 | type: dataset 57 | url: https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf 58 | -------------------------------------------------------------------------------- /assets/suno.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: '' 3 | created_date: 2023-04-20 4 | dependencies: 5 | - AudioLM 6 | description: Bark is a text-to-audio model that can generate multilingual speech 7 | as well as other noises. 8 | feedback: https://huggingface.co/spaces/suno/bark/discussions 9 | intended_uses: '' 10 | license: MIT 11 | modality: text; audio 12 | model_card: https://github.com/suno-ai/bark/blob/main/model-card.md 13 | monitoring: '' 14 | name: Bark 15 | nationality: USA 16 | organization: Suno 17 | prohibited_uses: '' 18 | quality_control: '' 19 | size: '' 20 | training_emissions: unknown 21 | training_hardware: '' 22 | training_time: unknown 23 | type: model 24 | url: https://github.com/suno-ai/bark 25 | -------------------------------------------------------------------------------- /assets/tiger.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated across a range of domain tasks across standard benchmarks in 3 | comparison to predecessor Llama 2. 4 | created_date: 2023-10-19 5 | dependencies: 6 | explanation: 7, 13, and 70B sizes trained on LLaMA 2. 180B model trained on BLOOM. 7 | value: 8 | - Llama 2 9 | - BLOOM 10 | description: TigerBot is an open source multilingual multitask LLM. 11 | feedback: https://huggingface.co/TigerResearch/tigerbot-180b-base-v2/discussions 12 | intended_uses: '' 13 | license: 14 | explanation: Should also consult with BLOOM and Llama 2 open-source licenses due 15 | to pre-training. 16 | value: Apache 2.0 17 | modality: text; text 18 | model_card: https://huggingface.co/TigerResearch/tigerbot-180b-base-v2 19 | monitoring: unknown 20 | name: TigerBot 21 | nationality: International 22 | organization: TigerResearch 23 | prohibited_uses: '' 24 | quality_control: Safety filtering performed to mitigate risk and remove toxic content. 25 | size: 180B parameters (dense) 26 | training_emissions: unknown 27 | training_hardware: 32 A100-40G GPUs 28 | training_time: unknown 29 | type: model 30 | url: https://arxiv.org/pdf/2312.08688.pdf 31 | -------------------------------------------------------------------------------- /assets/toronto.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Compared models trained on OpenWebMath for 1 epoch to models trained on 3 | The Pile and ProofPile on mathematics benchmarks. 4 | created_date: 2023-10-10 5 | datasheet: Can be found at section E of https://arxiv.org/pdf/2310.06786.pdf 6 | dependencies: 7 | - Common Crawl 8 | description: OpenWebMath is an open dataset containing 14.7B tokens of mathematical 9 | webpages from Common Crawl, inspired by Minerva. 10 | excluded: '' 11 | feedback: https://huggingface.co/datasets/open-web-math/open-web-math/discussions 12 | included: '' 13 | intended_uses: Language model pretraining, finetuning, and evaluation. 14 | license: ODC-By 1.0 15 | modality: text, mathematical tokens 16 | monitoring: none 17 | name: OpenWebMath 18 | nationality: Canada 19 | organization: University of Toronto 20 | prohibited_uses: Any tasks which may considered irresponsible or harmful. 21 | quality_control: Documents are filtered, processed for mathematical value, deduplicated, 22 | and then the largest documents are manually inspected for quality. 23 | sample: [] 24 | size: 14.7B documents 25 | type: dataset 26 | url: https://arxiv.org/pdf/2310.06786.pdf 27 | -------------------------------------------------------------------------------- /assets/transformify.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | adaptation: '' 3 | created_date: 2023-05-30 4 | dependencies: 5 | - GPT-4 6 | description: Transformify Automate is a platform for automated task integration 7 | using natural language prompts. 8 | failures: '' 9 | feedback: '' 10 | intended_uses: '' 11 | license: '' 12 | monitoring: '' 13 | monthly_active_users: '' 14 | name: Transformify Automate 15 | nationality: USA 16 | organization: Transformify 17 | output_space: text and code 18 | prohibited_uses: '' 19 | quality_control: '' 20 | terms_of_service: https://www.transformify.ai/legal-stuff 21 | type: application 22 | url: https://www.transformify.ai/automate 23 | user_distribution: '' 24 | -------------------------------------------------------------------------------- /assets/trevor.yaml: -------------------------------------------------------------------------------- 1 | - access: 2 | explanation: 'The tool can only be accessed by the Trevor Project will be counsolors 3 | for training purposes. 4 | 5 | ' 6 | value: closed 7 | adaptation: 8 | explanation: 'The base GPT-2 model was fine-tuned on the previous conversations 9 | between new trainees and the Trevor Project counselors posing to be a fictional 10 | persona in crisis. 11 | 12 | ' 13 | value: Fine-tuning 14 | created_date: 15 | explanation: 'The release date of the blog post introducing Crisis Contact Simulator 16 | [[Trevor Project Blog]](https://www.thetrevorproject.org/blog/the-trevor-project-launches-new-ai-tool-to-support-crisis-counselor-training/). 17 | 18 | ' 19 | value: 2021-03-24 20 | dependencies: 21 | - OpenAI API 22 | description: 'Crisis Contact Simulator, developed as part of a collaboration with 23 | Google.org, helps train The Trevor Project counselors by mimicking to be a teen 24 | in crisis. Crisis Contact Simulator is used as part of the training programs for 25 | the Trevor Project''s 24/7 digital crisis services that supports LGBTQ youth [[Trevor 26 | Project Blog]](https://www.thetrevorproject.org/blog/the-trevor-project-launches-new-ai-tool-to-support-crisis-counselor-training/). 27 | 28 | ' 29 | failures: unknown 30 | feedback: unknown 31 | intended_uses: 'Training counselors 32 | 33 | ' 34 | license: unknown 35 | monitoring: unknown 36 | monthly_active_users: unknown 37 | name: Crisis Contact Simulator 38 | nationality: USA 39 | organization: The Trevor Project 40 | output_space: Dialogue 41 | prohibited_uses: unknown 42 | quality_control: 43 | explanation: 'The tool can only be accessed by the Trevor Project will be counsolors 44 | for training purposes. 45 | 46 | ' 47 | value: Limited release 48 | terms_of_service: unknown 49 | type: application 50 | url: https://www.thetrevorproject.org/ 51 | user_distribution: unknown 52 | -------------------------------------------------------------------------------- /assets/triml.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on standard VLM benchmarks and outperforms SotA open-source 3 | VLMs as of release. 4 | created_date: 2024-02-09 5 | dependencies: [] 6 | description: Prism is a family of VLMs trained using new analyses about key vision 7 | design axes. 8 | feedback: none 9 | intended_uses: '' 10 | license: LLaMA 2 11 | modality: image, text; text 12 | model_card: none 13 | monitoring: unknown 14 | name: Prism 15 | nationality: Japan 16 | organization: Toyota Research Institute 17 | prohibited_uses: '' 18 | quality_control: '' 19 | size: 7B parameters (dense) 20 | training_emissions: unknown 21 | training_hardware: 8 A100 GPUs 22 | training_time: less than 9 hours 23 | type: model 24 | url: https://arxiv.org/pdf/2402.07865.pdf 25 | -------------------------------------------------------------------------------- /assets/twelvelabs.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated in comparison to SOTA video-to-language models. 3 | created_date: 2023-10-23 4 | dependencies: 5 | - MSR-VTT 6 | - Video-ChatGPT Video Descriptions Dataset 7 | description: Pegasus-1 is a video-language foundation model. 8 | feedback: none 9 | intended_uses: '' 10 | license: unknown 11 | modality: video; text 12 | model_card: none 13 | monitoring: none 14 | name: Pegasus-1 15 | nationality: USA 16 | organization: Twelve Labs 17 | prohibited_uses: '' 18 | quality_control: Data selected and cleaned to eliminate toxic and biased content. 19 | size: 80B parameters (dense) 20 | training_emissions: unknown 21 | training_hardware: unknown 22 | training_time: unknown 23 | type: model 24 | url: https://app.twelvelabs.io/blog/introducing-pegasus-1 25 | - access: open 26 | analysis: Marengo-2.6 sets new benchmarks in zero-shot text-to-video, text-to-image, 27 | and text-to-audio retrieval tasks with a single embedding model. 28 | created_date: 2024-03-01 29 | dependencies: [] 30 | description: "Marengo 2.6 is a new state-of-the-art (SOTA) multimodal foundation\ 31 | \ model capable of performing any-to-any search tasks, including Text-To-Video,\ 32 | \ Text-To-Image, Text-To-Audio, Audio-To-Video, Image-To-Video, and more.\_" 33 | feedback: none 34 | intended_uses: '' 35 | license: unknown 36 | modality: audio, image, text; audio, image, video 37 | model_card: none 38 | monitoring: '' 39 | name: Marengo 2.6 40 | nationality: USA 41 | organization: Twelve Labs 42 | prohibited_uses: '' 43 | quality_control: '' 44 | size: unknown 45 | training_emissions: unknown 46 | training_hardware: unknown 47 | training_time: unknown 48 | type: model 49 | url: https://www.twelvelabs.io/blog/introducing-marengo-2-6 50 | -------------------------------------------------------------------------------- /assets/uae.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated in 1-shot against the PaLM models, with the tasks of the paper 3 | "Language models are few-shot learners" (Brown et al., 2020); (2) on a small set 4 | of few-shot tasks reported by the GPT-4 paper; (3) against state-of-the-art models 5 | across common sense, question answering, and code tasks; (4) against models which 6 | also report results from the EAI Harness, for which we are able to compare with 7 | identical prompts and metrics. 8 | created_date: 2023-06-14 9 | dependencies: 10 | - RefinedWeb 11 | description: "Falcon-40B is a 40B parameters causal decoder-only model built by\ 12 | \ TII and trained on 1,000B tokens of\_RefinedWeb enhanced with curated corpora." 13 | feedback: https://huggingface.co/tiiuae/falcon-40b/discussions 14 | intended_uses: Research on large language models; as a foundation for further specialization 15 | for specific use cases. 16 | license: Apache 2.0 17 | modality: text; text 18 | model_card: https://huggingface.co/tiiuae/falcon-40b 19 | monitoring: None 20 | name: Falcon-40B 21 | nationality: UAE 22 | organization: UAE Technology Innovation Institute 23 | prohibited_uses: irresponsible or harmful use or production use without adequate 24 | assessment of risks and mitigation. 25 | quality_control: '' 26 | size: 40B parameters (dense) 27 | training_emissions: unknown 28 | training_hardware: 384 A100 40GB GPUs 29 | training_time: 2 months 30 | type: model 31 | url: https://arxiv.org/pdf/2311.16867.pdf 32 | - access: open 33 | analysis: '' 34 | created_date: 2023-06-01 35 | datasheet: https://huggingface.co/datasets/tiiuae/falcon-refinedweb 36 | dependencies: [] 37 | description: RefinedWeb is a high-quality five trillion tokens web-only English 38 | pretraining dataset. 39 | excluded: '' 40 | feedback: '' 41 | included: '' 42 | intended_uses: '' 43 | license: 44 | explanation: License can be found at https://huggingface.co/datasets/tiiuae/falcon-refinedweb 45 | value: custom 46 | modality: text 47 | monitoring: '' 48 | name: RefinedWeb 49 | nationality: UAE 50 | organization: UAE Technology Innovation Institute 51 | prohibited_uses: '' 52 | quality_control: '' 53 | sample: [] 54 | size: 600B tokens 55 | type: dataset 56 | url: https://arxiv.org/pdf/2306.01116.pdf 57 | - access: open 58 | analysis: Falcon-180B outperforms LLaMA-2, StableLM, RedPajama, MPT on the Open 59 | LLM Leaderboard at https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard. 60 | created_date: 2023-09-06 61 | dependencies: 62 | - RefinedWeb 63 | description: Falcon-180B is a 180B parameters causal decoder-only model built by 64 | TII and trained on 3,500B tokens of RefinedWeb enhanced with curated corpora. 65 | feedback: https://huggingface.co/tiiuae/falcon-180b/discussions 66 | intended_uses: Research on large language models; as a foundation for further specialization 67 | for specific use cases. 68 | license: unknown 69 | modality: text; text 70 | model_card: https://huggingface.co/tiiuae/falcon-180B 71 | monitoring: None 72 | name: Falcon-180B 73 | nationality: UAE 74 | organization: UAE Technology Innovation Institute 75 | prohibited_uses: Production use without adequate assessment of risks and mitigation; 76 | any use cases which may be considered irresponsible or harmful. 77 | quality_control: '' 78 | size: 180B parameters (dense) 79 | training_emissions: '' 80 | training_hardware: 4096 A100 40GB GPUs 81 | training_time: 9 months 82 | type: model 83 | url: https://arxiv.org/pdf/2311.16867.pdf 84 | -------------------------------------------------------------------------------- /assets/unknown.yaml: -------------------------------------------------------------------------------- 1 | - access: 2 | explanation: producing the currently strongest open-source base model. 3 | value: open 4 | analysis: Comprehensive evaluations reveal that DeepSeek-V3 outperforms other open-source 5 | models and achieves performance comparable to leading closed-source models. 6 | created_date: 2025-01-14 7 | dependencies: 8 | - DeepSeek-R1 9 | description: DeepSeek-V3 is a Mixture-of-Experts (MoE) language model with 671B 10 | total parameters and 37B activated per token. It utilizes Multi-head Latent Attention 11 | (MLA) and adopts innovative strategies for improved performance, such as an auxiliary-loss-free 12 | load balancing and a multi-token prediction training objective. Comprehensive 13 | evaluations show it achieves performance comparable to leading closed-source models. 14 | feedback: unknown 15 | intended_uses: unknown 16 | license: MIT 17 | modality: unknown 18 | model_card: https://huggingface.co/deepseek-ai/DeepSeek-V3 19 | monitoring: unknown 20 | name: DeepSeek-V3 21 | nationality: unknown 22 | organization: DeepSeek 23 | prohibited_uses: unknown 24 | quality_control: Post-training includes knowledge distillation from the DeepSeek-R1 25 | model, incorporating verification and reflection patterns to enhance reasoning 26 | performance. 27 | size: 28 | explanation: a strong Mixture-of-Experts (MoE) language model with 671B total 29 | parameters with 37B activated for each token. 30 | value: 671B parameters (sparse) 31 | training_emissions: unknown 32 | training_hardware: 33 | explanation: DeepSeek-V3 requires only 2.788M H800 GPU hours for its full training. 34 | value: H800 GPUs 35 | training_time: 36 | explanation: DeepSeek-V3 requires only 2.788M H800 GPU hours for its full training. 37 | value: 2.788M GPU hours 38 | type: model 39 | url: https://huggingface.co/deepseek-ai/DeepSeek-V3 40 | - access: open 41 | analysis: '' 42 | created_date: 2024-04-24 43 | dependencies: [] 44 | description: We present a methodology for scaling LLMs called depth up-scaling (DUS) 45 | , which encompasses architectural modifications and continued pretraining. In 46 | other words, we integrated Mistral 7B weights into the upscaled layers, and finally, 47 | continued pre-training for the entire model. SOLAR-10.7B has remarkable performance. 48 | It outperforms models with up to 30B parameters, even surpassing the recent Mixtral 49 | 8X7B model. For detailed information, please refer to the experimental table. 50 | Solar 10.7B is an ideal choice for fine-tuning. SOLAR-10.7B offers robustness 51 | and adaptability for your fine-tuning needs. Our simple instruction fine-tuning 52 | using the SOLAR-10.7B pre-trained model yields significant performance improvements 53 | (SOLAR-10.7B-Instruct-v1.0). 54 | feedback: https://www.upstage.ai/solar-llm 55 | intended_uses: '' 56 | license: Apache 2.0 57 | modality: text; text 58 | model_card: https://huggingface.co/upstage/SOLAR-10.7B-v1.0 59 | monitoring: '' 60 | name: SOLAR 61 | nationality: South Korea 62 | organization: Upstage.ai 63 | prohibited_uses: '' 64 | quality_control: '' 65 | size: 10.7B parameters 66 | training_emissions: '' 67 | training_hardware: '' 68 | training_time: '' 69 | type: model 70 | url: https://arxiv.org/abs/2312.15166 71 | -------------------------------------------------------------------------------- /assets/uw.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: '' 3 | created_date: 2022-01-07 4 | datasheet: '' 5 | dependencies: 6 | - YouTube 7 | description: '' 8 | excluded: '' 9 | feedback: '' 10 | included: '' 11 | intended_uses: '' 12 | license: MIT 13 | modality: video 14 | monitoring: '' 15 | name: YT-Temporal-1B 16 | nationality: USA 17 | organization: University of Washington 18 | prohibited_uses: '' 19 | quality_control: '' 20 | sample: [] 21 | size: 20M videos 22 | type: dataset 23 | url: https://arxiv.org/abs/2201.02639 24 | -------------------------------------------------------------------------------- /assets/uwashington.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Reports results on the Vicuna benchmark and compares performance level 3 | and time expenditure with ChatGPT 4 | created_date: 2023-05-23 5 | dependencies: 6 | - QLoRA 7 | - OASST1 8 | description: Guanaco is a model family trained with QLORA, an efficient finetuning 9 | approach that reduces memory usage enough to finetune a 65B parameter model on 10 | a single 48GB GPU while preserving full 16-bit finetuning task performance. 11 | feedback: '' 12 | intended_uses: '' 13 | license: MIT 14 | modality: text; text 15 | model_card: '' 16 | monitoring: '' 17 | name: Guanaco 18 | nationality: USA 19 | organization: University of Washington 20 | prohibited_uses: '' 21 | quality_control: '' 22 | size: 33B parameters (dense) 23 | training_emissions: '' 24 | training_hardware: A single 24 GB GPU 25 | training_time: '' 26 | type: model 27 | url: https://arxiv.org/pdf/2305.14314v1.pdf 28 | - access: open 29 | analysis: Evaluated on benchmark music understanding tasks on SOTA music datasets. 30 | created_date: 2023-10-11 31 | dependencies: 32 | - LLaMA 2 33 | - Jukebox 34 | description: Llark is an instruction-tuned multimodal model for music understanding. 35 | feedback: none 36 | intended_uses: '' 37 | license: Apache 2.0 38 | modality: audio, text; text 39 | model_card: none 40 | monitoring: '' 41 | name: Llark 42 | nationality: USA 43 | organization: University of Washington, Spotify 44 | prohibited_uses: '' 45 | quality_control: '' 46 | size: 12B parameters (dense) 47 | training_emissions: unknown 48 | training_hardware: 4 80GB NVIDIA A40 GPUs 49 | training_time: 54 hours 50 | type: model 51 | url: https://arxiv.org/pdf/2310.07160.pdf 52 | -------------------------------------------------------------------------------- /assets/vago.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on standard benchmarks in comparison to other German language 3 | models. 4 | created_date: 2023-11-28 5 | dependencies: 6 | - OpenHermes 2.5 Mistral 7 | - OpenOrca Mistral 8 | description: SauerkrautLM is a German language model merged from two Mistral derivatives. 9 | feedback: https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO/discussions 10 | intended_uses: '' 11 | license: Apache 2.0 12 | modality: text; text 13 | model_card: https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO 14 | monitoring: unknown 15 | name: SauerkrautLM 16 | nationality: USA 17 | organization: VAGO Solutions 18 | prohibited_uses: '' 19 | quality_control: '' 20 | size: 7B parameters (dense) 21 | training_emissions: unknown 22 | training_hardware: unknown 23 | training_time: unknown 24 | type: model 25 | url: https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO 26 | -------------------------------------------------------------------------------- /assets/viable.yaml: -------------------------------------------------------------------------------- 1 | - access: limited 2 | adaptation: unknown 3 | created_date: unknown 4 | dependencies: 5 | - OpenAI API 6 | description: 'Viable analyzes qualitative consumer feedback and provides summary 7 | feedback to companies. 8 | 9 | ' 10 | failures: unknown 11 | feedback: 12 | explanation: The feedback mechanism isn't explicitly listed. Company contact information 13 | is provided in the Terms of Service [[Terms of Service]](https://www.askviable.com/terms-of-service). 14 | value: unknown 15 | intended_uses: 'Intended to be used by companies to digest qualitative consumer 16 | feedback. 17 | 18 | ' 19 | license: unknown 20 | monitoring: unknown 21 | monthly_active_users: unknown 22 | name: Viable 23 | nationality: USA 24 | organization: Viable 25 | output_space: Question and answer, summarization, sentiment analysis, topic identification 26 | prohibited_uses: 'Prohibited uses are listed in the Terms of Service [[Terms of 27 | Service]](https://www.askviable.com/terms-of-service). The terms don''t include 28 | statements specific to the use of the content generated by the system or GPT-3. 29 | 30 | ' 31 | quality_control: unknown 32 | terms_of_service: https://www.askviable.com/terms-of-service 33 | type: application 34 | url: https://www.askviable.com/ 35 | user_distribution: unknown 36 | -------------------------------------------------------------------------------- /assets/vilm.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: none 3 | created_date: 2023-10-02 4 | dependencies: 5 | - Falcon-180B 6 | description: Vulture is a further fine-tuned causal Decoder-only LLM built by Virtual 7 | Interactive (VILM) on top of Falcon. 8 | feedback: https://huggingface.co/vilm/vulture-180b/discussions 9 | intended_uses: '' 10 | license: Apache 2.0 11 | modality: text; text 12 | model_card: https://huggingface.co/vilm/vulture-180b 13 | monitoring: unknown 14 | name: Vulture 15 | nationality: USA 16 | organization: Virtual Interactive 17 | prohibited_uses: Production use without adequate assessment of risks and mitigation; 18 | any use cases which may be considered irresponsible or harmful. 19 | quality_control: '' 20 | size: 180B parameters (dense) 21 | training_emissions: unknown 22 | training_hardware: unknown 23 | training_time: 3000 A100 hours 24 | type: model 25 | url: https://huggingface.co/vilm/vulture-180b 26 | -------------------------------------------------------------------------------- /assets/wayve.yaml: -------------------------------------------------------------------------------- 1 | - access: closed 2 | analysis: Compared to self before being scaled on quality of video generation. 3 | created_date: 2023-09-29 4 | dependencies: [] 5 | description: "GAIA-1 (\u2018Generative AI for Autonomy\u2019) is a generative world\ 6 | \ model that leverages video, text, and action inputs to generate realistic driving\ 7 | \ scenarios while offering fine-grained control over ego-vehicle behavior and\ 8 | \ scene features." 9 | feedback: none 10 | intended_uses: The main use cases are pure representation learning, planning (look-ahead 11 | search), or learning a policy in the world model (neural simulator) 12 | license: unknown 13 | modality: text, video; video 14 | model_card: none 15 | monitoring: none 16 | name: GAIA-1 17 | nationality: UK 18 | organization: Wayve 19 | prohibited_uses: none 20 | quality_control: none 21 | size: 9B parameters (dense) 22 | training_emissions: unknown 23 | training_hardware: 32 A100 80GB GPUs 24 | training_time: 4 days 25 | type: model 26 | url: https://arxiv.org/pdf/2309.17080.pdf 27 | -------------------------------------------------------------------------------- /assets/xverse.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated across a range of standard datasets regarding multiple model 3 | capabilities like language comprehension and logical reasoning. 4 | created_date: 2023-11-06 5 | dependencies: [] 6 | description: XVERSE is a multilingual large language model for over 40 languages. 7 | feedback: https://huggingface.co/xverse/XVERSE-65B/discussions 8 | intended_uses: '' 9 | license: 10 | explanation: can be found at https://github.com/xverse-ai/XVERSE-65B/blob/main/MODEL_LICENSE.pdf. 11 | Code license is under Apache 2.0 12 | value: custom 13 | modality: text; text 14 | model_card: https://huggingface.co/xverse/XVERSE-65B 15 | monitoring: unknown 16 | name: XVERSE 17 | nationality: USA 18 | organization: Xverse 19 | prohibited_uses: '' 20 | quality_control: '' 21 | size: 65B parameters (dense) 22 | training_emissions: unknown 23 | training_hardware: unknown 24 | training_time: unknown 25 | type: model 26 | url: https://github.com/xverse-ai/XVERSE-65B 27 | -------------------------------------------------------------------------------- /assets/xwin.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on AlpacaEval benchmark against SOTA LLMs. 3 | created_date: 2023-09-20 4 | dependencies: [] 5 | description: Xwin-LM is a LLM, which on release, ranked top 1 on AlpacaEval, becoming 6 | the first to surpass GPT-4 on this benchmark. 7 | feedback: https://huggingface.co/Xwin-LM/Xwin-LM-70B-V0.1/discussions 8 | intended_uses: '' 9 | license: LLaMA2 10 | modality: text; text 11 | model_card: https://huggingface.co/Xwin-LM/Xwin-LM-70B-V0.1 12 | monitoring: none 13 | name: Xwin-LM 14 | nationality: USA 15 | organization: Xwin 16 | prohibited_uses: '' 17 | quality_control: '' 18 | size: 70B parameters (dense) 19 | training_emissions: unknown 20 | training_hardware: unknown 21 | training_time: unknown 22 | type: model 23 | url: https://huggingface.co/Xwin-LM/Xwin-LM-70B-V0.1 24 | -------------------------------------------------------------------------------- /assets/yandex.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: '' 3 | created_date: 4 | explanation: The date the blog post about YaLM was published. 5 | value: 2022-06-22 6 | dependencies: 7 | - The Pile 8 | - Yandex Russian Pretraining Dataset 9 | description: YaLM is a 100B parameter autoregressive model trained on 25% English 10 | and 75% Russian text. 11 | feedback: '' 12 | intended_uses: '' 13 | license: 14 | explanation: '' 15 | value: Apache 2.0 16 | modality: text; text 17 | model_card: '' 18 | monitoring: '' 19 | name: YaLM 20 | nationality: Russia 21 | organization: Yandex 22 | prohibited_uses: '' 23 | quality_control: '' 24 | size: 100B parameters (dense) 25 | training_emissions: '' 26 | training_hardware: Yandex 800 A100 Cluster 27 | training_time: '' 28 | type: model 29 | url: https://medium.com/yandex/yandex-publishes-yalm-100b-its-the-largest-gpt-like-neural-network-in-open-source-d1df53d0e9a6 30 | - access: open 31 | adaptation: '' 32 | created_date: 2022-06-23 33 | dependencies: 34 | - YaLM 35 | description: Yandex is a search engine and web portal. Yandex offers internet search 36 | and other services 37 | failures: '' 38 | feedback: '' 39 | intended_uses: '' 40 | license: 41 | explanation: '"3.1. The Rightholder, on the basis of a simple (non-exclusive) 42 | license, shall grant the User a free, worldwide, non-transferable right to use 43 | the Software as set forth below: 44 | 45 | 3.1.1. To use the Software in accordance with its functionality by copying and 46 | installing it on the User''s personal device(s). The User shall be entitled 47 | to install the Software on any number of personal devices. 48 | 49 | 3.1.2. To reproduce and to distribute this Software in an unmodified form and 50 | solely for non-commercial purposes (free of charge)." - excerpt from the Terms 51 | of Service document for browsers. 52 | 53 | ' 54 | value: custom 55 | monitoring: '' 56 | monthly_active_users: '' 57 | name: Yandex Search 58 | nationality: Russia 59 | organization: Yandex 60 | output_space: '' 61 | prohibited_uses: '' 62 | quality_control: '' 63 | terms_of_service: https://yandex.com/legal/browser_agreement/ 64 | type: application 65 | url: https://medium.com/yandex/yandex-publishes-yalm-100b-its-the-largest-gpt-like-neural-network-in-open-source-d1df53d0e9a6 66 | user_distribution: '' 67 | -------------------------------------------------------------------------------- /assets/you.yaml: -------------------------------------------------------------------------------- 1 | - access: closed 2 | analysis: '' 3 | created_date: unknown 4 | datasheet: '' 5 | dependencies: [] 6 | description: '' 7 | excluded: '' 8 | feedback: '' 9 | included: '' 10 | intended_uses: '' 11 | license: unknown 12 | modality: text 13 | monitoring: '' 14 | name: You dataset 15 | nationality: USA 16 | organization: You 17 | prohibited_uses: '' 18 | quality_control: '' 19 | sample: [] 20 | size: unknown 21 | type: dataset 22 | url: https://you.com/ 23 | - access: closed 24 | analysis: '' 25 | created_date: unknown 26 | dependencies: 27 | - You dataset 28 | description: '' 29 | feedback: '' 30 | intended_uses: '' 31 | license: unknown 32 | modality: text; text 33 | model_card: '' 34 | monitoring: '' 35 | name: You model 36 | nationality: USA 37 | organization: You 38 | prohibited_uses: '' 39 | quality_control: '' 40 | size: unknkown 41 | training_emissions: '' 42 | training_hardware: '' 43 | training_time: '' 44 | type: model 45 | url: https://you.com/ 46 | - access: open 47 | adaptation: '' 48 | created_date: unknown 49 | dependencies: 50 | - You model 51 | description: You.com is a search engine built on artificial intelligence that provides 52 | users with a customized search experience while keeping their data 100% private. 53 | failures: '' 54 | feedback: '' 55 | intended_uses: '' 56 | license: unknown 57 | monitoring: '' 58 | monthly_active_users: '' 59 | name: You Search 60 | nationality: USA 61 | organization: You 62 | output_space: '' 63 | prohibited_uses: '' 64 | quality_control: '' 65 | terms_of_service: '' 66 | type: application 67 | url: https://you.com/ 68 | user_distribution: '' 69 | -------------------------------------------------------------------------------- /assets/zhejiang.yaml: -------------------------------------------------------------------------------- 1 | - access: open 2 | analysis: Evaluated on standard and ocean science benchmarks in comparison to other 3 | similar-sized models. 4 | created_date: 2024-02-06 5 | dependencies: [] 6 | description: OceanGPT is the first-ever LLM in the ocean domain and displays expertise 7 | in various ocean science tasks. 8 | feedback: https://huggingface.co/zjunlp/OceanGPT-7b/discussions 9 | intended_uses: '' 10 | license: MIT 11 | modality: text; text 12 | model_card: https://huggingface.co/zjunlp/OceanGPT-7b 13 | monitoring: unknown 14 | name: OceanGPT 15 | nationality: China 16 | organization: Zhejiang University 17 | prohibited_uses: '' 18 | quality_control: '' 19 | size: 7B parameters (dense) 20 | training_emissions: unknown 21 | training_hardware: 6 A800 NVIDIA GPUs 22 | training_time: 7 days 23 | type: model 24 | url: https://www.zjukg.org/project/OceanGPT/ 25 | -------------------------------------------------------------------------------- /components/home.html: -------------------------------------------------------------------------------- 1 |
17 | Foundation models define an emerging ecosystem of datasets, models, and 18 | applications. As their footprint grows, so does their 19 | societal impact. 20 |
21 |22 | Ecosystem graphs is a framework to document the foundation models 23 | ecosystem, namely both the assets (datasets, models, and applications) 24 | and their relationships. Using it, one can answer questions such as: 25 | What are the latest foundation models? Who builds them and where are 26 | they used downstream? What are the general trends over time? We hope 27 | that ecosystem graphs will be a useful resource for researchers, 28 | application developers, policymakers, and the public to better 29 | understand the foundation models ecosystem. 30 |
31 |