├── .gitignore
├── LICENSE
├── README.md
├── contributing.md
└── scripts
    ├── generate-star-badges.py
    └── github-markdown-toc


/.gitignore:
--------------------------------------------------------------------------------
1 | README.md.backup
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Creative Commons Legal Code
  2 | 
  3 | CC0 1.0 Universal
  4 | 
  5 |     CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
  6 |     LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
  7 |     ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
  8 |     INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
  9 |     REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
 10 |     PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
 11 |     THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
 12 |     HEREUNDER.
 13 | 
 14 | Statement of Purpose
 15 | 
 16 | The laws of most jurisdictions throughout the world automatically confer
 17 | exclusive Copyright and Related Rights (defined below) upon the creator
 18 | and subsequent owner(s) (each and all, an "owner") of an original work of
 19 | authorship and/or a database (each, a "Work").
 20 | 
 21 | Certain owners wish to permanently relinquish those rights to a Work for
 22 | the purpose of contributing to a commons of creative, cultural and
 23 | scientific works ("Commons") that the public can reliably and without fear
 24 | of later claims of infringement build upon, modify, incorporate in other
 25 | works, reuse and redistribute as freely as possible in any form whatsoever
 26 | and for any purposes, including without limitation commercial purposes.
 27 | These owners may contribute to the Commons to promote the ideal of a free
 28 | culture and the further production of creative, cultural and scientific
 29 | works, or to gain reputation or greater distribution for their Work in
 30 | part through the use and efforts of others.
 31 | 
 32 | For these and/or other purposes and motivations, and without any
 33 | expectation of additional consideration or compensation, the person
 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she
 35 | is an owner of Copyright and Related Rights in the Work, voluntarily
 36 | elects to apply CC0 to the Work and publicly distribute the Work under its
 37 | terms, with knowledge of his or her Copyright and Related Rights in the
 38 | Work and the meaning and intended legal effect of CC0 on those rights.
 39 | 
 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be
 41 | protected by copyright and related or neighboring rights ("Copyright and
 42 | Related Rights"). Copyright and Related Rights include, but are not
 43 | limited to, the following:
 44 | 
 45 |   i. the right to reproduce, adapt, distribute, perform, display,
 46 |      communicate, and translate a Work;
 47 |  ii. moral rights retained by the original author(s) and/or performer(s);
 48 | iii. publicity and privacy rights pertaining to a person's image or
 49 |      likeness depicted in a Work;
 50 |  iv. rights protecting against unfair competition in regards to a Work,
 51 |      subject to the limitations in paragraph 4(a), below;
 52 |   v. rights protecting the extraction, dissemination, use and reuse of data
 53 |      in a Work;
 54 |  vi. database rights (such as those arising under Directive 96/9/EC of the
 55 |      European Parliament and of the Council of 11 March 1996 on the legal
 56 |      protection of databases, and under any national implementation
 57 |      thereof, including any amended or successor version of such
 58 |      directive); and
 59 | vii. other similar, equivalent or corresponding rights throughout the
 60 |      world based on applicable law or treaty, and any national
 61 |      implementations thereof.
 62 | 
 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention
 64 | of, applicable law, Affirmer hereby overtly, fully, permanently,
 65 | irrevocably and unconditionally waives, abandons, and surrenders all of
 66 | Affirmer's Copyright and Related Rights and associated claims and causes
 67 | of action, whether now known or unknown (including existing as well as
 68 | future claims and causes of action), in the Work (i) in all territories
 69 | worldwide, (ii) for the maximum duration provided by applicable law or
 70 | treaty (including future time extensions), (iii) in any current or future
 71 | medium and for any number of copies, and (iv) for any purpose whatsoever,
 72 | including without limitation commercial, advertising or promotional
 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
 74 | member of the public at large and to the detriment of Affirmer's heirs and
 75 | successors, fully intending that such Waiver shall not be subject to
 76 | revocation, rescission, cancellation, termination, or any other legal or
 77 | equitable action to disrupt the quiet enjoyment of the Work by the public
 78 | as contemplated by Affirmer's express Statement of Purpose.
 79 | 
 80 | 3. Public License Fallback. Should any part of the Waiver for any reason
 81 | be judged legally invalid or ineffective under applicable law, then the
 82 | Waiver shall be preserved to the maximum extent permitted taking into
 83 | account Affirmer's express Statement of Purpose. In addition, to the
 84 | extent the Waiver is so judged Affirmer hereby grants to each affected
 85 | person a royalty-free, non transferable, non sublicensable, non exclusive,
 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and
 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the
 88 | maximum duration provided by applicable law or treaty (including future
 89 | time extensions), (iii) in any current or future medium and for any number
 90 | of copies, and (iv) for any purpose whatsoever, including without
 91 | limitation commercial, advertising or promotional purposes (the
 92 | "License"). The License shall be deemed effective as of the date CC0 was
 93 | applied by Affirmer to the Work. Should any part of the License for any
 94 | reason be judged legally invalid or ineffective under applicable law, such
 95 | partial invalidity or ineffectiveness shall not invalidate the remainder
 96 | of the License, and in such case Affirmer hereby affirms that he or she
 97 | will not (i) exercise any of his or her remaining Copyright and Related
 98 | Rights in the Work or (ii) assert any associated claims and causes of
 99 | action with respect to the Work, in either case contrary to Affirmer's
100 | express Statement of Purpose.
101 | 
102 | 4. Limitations and Disclaimers.
103 | 
104 |  a. No trademark or patent rights held by Affirmer are waived, abandoned,
105 |     surrendered, licensed or otherwise affected by this document.
106 |  b. Affirmer offers the Work as-is and makes no representations or
107 |     warranties of any kind concerning the Work, express, implied,
108 |     statutory or otherwise, including without limitation warranties of
109 |     title, merchantability, fitness for a particular purpose, non
110 |     infringement, or the absence of latent or other defects, accuracy, or
111 |     the present or absence of errors, whether or not discoverable, all to
112 |     the greatest extent permissible under applicable law.
113 |  c. Affirmer disclaims responsibility for clearing rights of other persons
114 |     that may apply to the Work or any use thereof, including without
115 |     limitation any person's Copyright and Related Rights in the Work.
116 |     Further, Affirmer disclaims responsibility for obtaining any necessary
117 |     consents, permissions or other rights required for any use of the
118 |     Work.
119 |  d. Affirmer understands and acknowledges that Creative Commons is not a
120 |     party to this document and has no duty or obligation with respect to
121 |     this CC0 or use of the Work.
122 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | **Awesome LLMOps**
  2 | 
  3 | <a href="https://discord.gg/KqswhpVgdU"><img alt="discord invitation link" src="https://dcbadge.vercel.app/api/server/KqswhpVgdU?style=flat"></a>
  4 | <a href="https://awesome.re"><img src="https://awesome.re/badge-flat2.svg"></a>
  5 | 
  6 | An awesome & curated list of the best LLMOps tools for developers.
  7 | 
  8 | **Contribute**
  9 | 
 10 | Contributions are most welcome, please adhere to the [contribution guidelines](contributing.md).
 11 | 
 12 | # Table of Contents
 13 | 
 14 | - [Table of Contents](#table-of-contents)
 15 | - [Model](#model)
 16 |   - [Large Language Model](#large-language-model)
 17 |   - [CV Foundation Model](#cv-foundation-model)
 18 |   - [Audio Foundation Model](#audio-foundation-model)
 19 | - [Serving](#serving)
 20 |   - [Large Model Serving](#large-model-serving)
 21 |   - [Frameworks/Servers for Serving](#frameworksservers-for-serving)
 22 |   - [Observability](#observability)
 23 | - [LLMOps](#llmops)
 24 | - [Search](#search)
 25 |   - [Vector search](#vector-search)
 26 | - [Code AI](#code-ai)
 27 | - [Training](#training)
 28 |   - [IDEs and Workspaces](#ides-and-workspaces)
 29 |   - [Foundation Model Fine Tuning](#foundation-model-fine-tuning)
 30 |   - [Frameworks for Training](#frameworks-for-training)
 31 |   - [Experiment Tracking](#experiment-tracking)
 32 |   - [Visualization](#visualization)
 33 | - [Data](#data)
 34 |   - [Data Management](#data-management)
 35 |   - [Data Storage](#data-storage)
 36 |   - [Data Tracking](#data-tracking)
 37 |   - [Feature Engineering](#feature-engineering)
 38 |   - [Data/Feature enrichment](#datafeature-enrichment)
 39 | - [Large Scale Deployment](#large-scale-deployment)
 40 |   - [ML Platforms](#ml-platforms)
 41 |   - [Workflow](#workflow)
 42 |   - [Scheduling](#scheduling)
 43 |   - [Model Management](#model-management)
 44 | - [Performance](#performance)
 45 |   - [ML Compiler](#ml-compiler)
 46 |   - [Profiling](#profiling)
 47 | - [AutoML](#automl)
 48 | - [Optimizations](#optimizations)
 49 | - [Federated ML](#federated-ml)
 50 | - [Awesome Lists](#awesome-lists)
 51 | 
 52 | <!-- Created by https://github.com/ekalinin/github-markdown-toc -->
 53 | 
 54 | # Model
 55 | 
 56 | ## Large Language Model
 57 | 
 58 | - [Alpaca](https://github.com/tatsu-lab/stanford_alpaca) ![](https://img.shields.io/github/stars/tatsu-lab/stanford_alpaca.svg?style=social) - Code and documentation to train Stanford's Alpaca models, and generate the data.
 59 | - [BELLE](https://github.com/LianjiaTech/BELLE) ![](https://img.shields.io/github/stars/LianjiaTech/BELLE.svg?style=social) - A 7B Large Language Model fine-tune by 34B Chinese Character Corpus, based on LLaMA and Alpaca.
 60 | - [Bloom](https://github.com/bigscience-workshop/model_card) ![](https://img.shields.io/github/stars/bigscience-workshop/model_card.svg?style=social) - BigScience Large Open-science Open-access Multilingual Language Model
 61 | - [dolly](https://github.com/databrickslabs/dolly) ![](https://img.shields.io/github/stars/databrickslabs/dolly.svg?style=social) - Databricks’ Dolly, a large language model trained on the Databricks Machine Learning Platform
 62 | - [Falcon 40B](https://huggingface.co/tiiuae/falcon-40b-instruct) - Falcon-40B-Instruct is a 40B parameters causal decoder-only model built by TII based on Falcon-40B and finetuned on a mixture of Baize. It is made available under the Apache 2.0 license.
 63 | - [FastChat (Vicuna)](https://github.com/lm-sys/FastChat) ![](https://img.shields.io/github/stars/lm-sys/FastChat.svg?style=social) - An open platform for training, serving, and evaluating large language models. Release repo for Vicuna and FastChat-T5.
 64 | - [GLM-6B (ChatGLM)](https://github.com/THUDM/ChatGLM-6B) ![](https://img.shields.io/github/stars/THUDM/ChatGLM-6B.svg?style=social) - An Open Bilingual Pre-Trained Model, quantization of ChatGLM-130B, can run on consumer-level GPUs.
 65 | - [GLM-130B (ChatGLM)](https://github.com/THUDM/GLM-130B) ![](https://img.shields.io/github/stars/THUDM/GLM-130B.svg?style=social) - An Open Bilingual Pre-Trained Model (ICLR 2023)
 66 | - [GPT-NeoX](https://github.com/EleutherAI/gpt-neox) ![](https://img.shields.io/github/stars/EleutherAI/gpt-neox.svg?style=social) - An implementation of model parallel autoregressive transformers on GPUs, based on the DeepSpeed library.
 67 | - [Luotuo](https://github.com/LC1332/Luotuo-Chinese-LLM) ![](https://img.shields.io/github/stars/LC1332/Luotuo-Chinese-LLM.svg?style=social) - A Chinese LLM, Based on LLaMA and fine tune by Stanford Alpaca, Alpaca LoRA, Japanese-Alpaca-LoRA.
 68 | - [StableLM](https://github.com/Stability-AI/StableLM) ![](https://img.shields.io/github/stars/Stability-AI/StableLM.svg?style=social) - StableLM: Stability AI Language Models
 69 | 
 70 | **[⬆ back to ToC](#table-of-contents)**
 71 | 
 72 | ## CV Foundation Model
 73 | 
 74 | - [disco-diffusion](https://github.com/alembics/disco-diffusion) ![](https://img.shields.io/github/stars/alembics/disco-diffusion.svg?style=social) - A frankensteinian amalgamation of notebooks, models and techniques for the generation of AI Art and Animations.
 75 | - [midjourney](https://www.midjourney.com/home/) - Midjourney is an independent research lab exploring new mediums of thought and expanding the imaginative powers of the human species.
 76 | - [segment-anything (SAM)](https://github.com/facebookresearch/segment-anything) ![](https://img.shields.io/github/stars/facebookresearch/segment-anything.svg?style=social) - produces high quality object masks from input prompts such as points or boxes, and it can be used to generate masks for all objects in an image.
 77 | - [stable-diffusion](https://github.com/CompVis/stable-diffusion) ![](https://img.shields.io/github/stars/CompVis/stable-diffusion.svg?style=social) - A latent text-to-image diffusion model
 78 | - [stable-diffusion v2](https://github.com/Stability-AI/stablediffusion) ![](https://img.shields.io/github/stars/Stability-AI/stablediffusion.svg?style=social) - High-Resolution Image Synthesis with Latent Diffusion Models
 79 | 
 80 | **[⬆ back to ToC](#table-of-contents)**
 81 | 
 82 | ## Audio Foundation Model
 83 | 
 84 | - [bark](https://github.com/suno-ai/bark) ![](https://img.shields.io/github/stars/suno-ai/bark.svg?style=social) - Bark is a transformer-based text-to-audio model created by Suno. Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects.
 85 | - [whisper](https://github.com/openai/whisper) ![](https://img.shields.io/github/stars/openai/whisper.svg?style=social) - Robust Speech Recognition via Large-Scale Weak Supervision
 86 | 
 87 | # Serving
 88 | 
 89 | ## Large Model Serving
 90 | 
 91 | - [Alpaca-LoRA-Serve](https://github.com/deep-diver/Alpaca-LoRA-Serve) ![](https://img.shields.io/github/stars/deep-diver/Alpaca-LoRA-Serve.svg?style=social) - Alpaca-LoRA as Chatbot service
 92 | - [DeepSpeed-MII](https://github.com/microsoft/DeepSpeed-MII) ![](https://img.shields.io/github/stars/microsoft/DeepSpeed-MII.svg?style=social) - MII makes low-latency and high-throughput inference possible, powered by DeepSpeed.
 93 | - [FlexGen](https://github.com/FMInference/FlexGen) ![](https://img.shields.io/github/stars/FMInference/FlexGen.svg?style=social) - Running large language models on a single GPU for throughput-oriented scenarios.
 94 | - [Flowise](https://github.com/FlowiseAI/Flowise) ![](https://img.shields.io/github/stars/FlowiseAI/Flowise.svg?style=social) - Drag & drop UI to build your customized LLM flow using LangchainJS.
 95 | - [llama.cpp](https://github.com/ggerganov/llama.cpp) ![](https://img.shields.io/github/stars/ggerganov/llama.cpp.svg?style=social) - Port of Facebook's LLaMA model in C/C++
 96 | - [Modelz-LLM](https://github.com/tensorchord/modelz-llm) ![](https://img.shields.io/github/stars/tensorchord/modelz-llm.svg?style=social) - OpenAI compatible API for LLMs and embeddings (LLaMA, Vicuna, ChatGLM and many others)
 97 | - [whisper.cpp](https://github.com/ggerganov/whisper.cpp) ![](https://img.shields.io/github/stars/ggerganov/whisper.cpp.svg?style=social) - Port of OpenAI's Whisper model in C/C++
 98 | - [x-stable-diffusion](https://github.com/stochasticai/x-stable-diffusion) ![](https://img.shields.io/github/stars/stochasticai/x-stable-diffusion.svg?style=social) - Real-time inference for Stable Diffusion - 0.88s latency. Covers AITemplate, nvFuser, TensorRT, FlashAttention.
 99 | 
100 | **[⬆ back to ToC](#table-of-contents)**
101 | 
102 | ## Frameworks/Servers for Serving
103 | 
104 | - [BentoML](https://github.com/bentoml/BentoML) ![](https://img.shields.io/github/stars/bentoml/BentoML.svg?style=social) - The Unified Model Serving Framework
105 | - [Mosec](https://github.com/mosecorg/mosec) ![](https://img.shields.io/github/stars/mosecorg/mosec?style=social) - A machine learning model serving framework with dynamic batching and pipelined stages, provides an easy-to-use Python interface.
106 | - [TFServing](https://github.com/tensorflow/serving) ![](https://img.shields.io/github/stars/tensorflow/serving.svg?style=social) - A flexible, high-performance serving system for machine learning models.
107 | - [Torchserve](https://github.com/pytorch/serve) ![](https://img.shields.io/github/stars/pytorch/serve.svg?style=social) - Serve, optimize and scale PyTorch models in production
108 | - [Triton Server (TRTIS)](https://github.com/triton-inference-server/server) ![](https://img.shields.io/github/stars/triton-inference-server/server.svg?style=social) - The Triton Inference Server provides an optimized cloud and edge inferencing solution.
109 | - [langchain-serve](https://github.com/jina-ai/langchain-serve) ![](https://img.shields.io/github/stars/jina-ai/langchain-serve.svg?style=social) - Serverless LLM apps on Production with Jina AI Cloud
110 | 
111 | **[⬆ back to ToC](#table-of-contents)**
112 | 
113 | ## Observability
114 | 
115 | - [Deepchecks](https://github.com/deepchecks/deepchecks) ![](https://img.shields.io/github/stars/deepchecks/deepchecks.svg?style=social) - Tests for Continuous Validation of ML Models & Data. Deepchecks is a Python package for comprehensively validating your machine learning models and data with minimal effort.
116 | - [Evidently](https://github.com/evidentlyai/evidently) ![](https://img.shields.io/github/stars/evidentlyai/evidently.svg?style=social) - Evaluate and monitor ML models from validation to production.
117 | - [Great Expectations](https://github.com/great-expectations/great_expectations) ![](https://img.shields.io/github/stars/great-expectations/great_expectations.svg?style=social) - Always know what to expect from your data.
118 | - [whylogs](https://github.com/whylabs/whylogs) ![](https://img.shields.io/github/stars/whylabs/whylogs.svg?style=social) - The open standard for data logging
119 | 
120 | **[⬆ back to ToC](#table-of-contents)**
121 | 
122 | # LLMOps
123 | 
124 | - [Arize-Phoenix](https://github.com/Arize-ai/phoenix) ![](https://img.shields.io/github/stars/Arize-ai/phoenix.svg?style=social) - ML observability for LLMs, vision, language, and tabular models.
125 | - [deeplake](https://github.com/activeloopai/deeplake) ![](https://img.shields.io/github/stars/activeloopai/Hub.svg?style=social) - Stream large multimodal datasets to achieve near 100% GPU utilization. Query, visualize, & version control data. Access data w/o the need to recompute the embeddings for the model finetuning.
126 | - [GPTCache](https://github.com/zilliztech/GPTCache) ![](https://img.shields.io/github/stars/zilliztech/GPTCache.svg?style=social) - Creating semantic cache to store responses from LLM queries.
127 | - [Haystack](https://github.com/deepset-ai/haystack) ![](https://img.shields.io/github/stars/deepset-ai/haystack.svg?style=social) - Quickly compose applications with LLM Agents, semantic search, question-answering and more.
128 | - [langchain](https://github.com/hwchase17/langchain) ![](https://img.shields.io/github/stars/hwchase17/langchain.svg?style=social) - Building applications with LLMs through composability
129 | - [LangFlow](https://github.com/logspace-ai/langflow) ![](https://img.shields.io/github/stars/logspace-ai/langflow.svg?style=social) - An effortless way to experiment and prototype LangChain flows with drag-and-drop components and a chat interface.
130 | - [LlamaIndex](https://github.com/jerryjliu/llama_index) ![](https://img.shields.io/github/stars/jerryjliu/llama_index.svg?style=social) - Provides a central interface to connect your LLMs with external data.
131 | - [promptfoo](https://github.com/typpo/promptfoo) ![](https://img.shields.io/github/stars/typpo/promptfoo.svg?style=social) - Open-source tool for testing & evaluating prompt quality. Create test cases, automatically check output quality and catch regressions, and reduce evaluation cost.
132 | - [Weights & Biases (Prompts)](https://docs.wandb.ai/guides/prompts)- A suite of LLMOps tools within the developer-first W&B MLOps platform. Utilize W&B Prompts for visualizing and inspecting LLM execution flow, tracking inputs and outputs, viewing intermediate results, securely managing prompts and LLM chain configurations.
133 | - [xTuring](https://github.com/stochasticai/xturing) ![](https://img.shields.io/github/stars/stochasticai/xturing.svg?style=social) - Build and control your personal LLMs with fast and efficient fine-tuning.
134 | - [ZenML](https://github.com/zenml-io/zenml) ![](https://img.shields.io/github/stars/zenml-io/zenml.svg?style=social) - Open-source framework for orchestrating, experimenting and deploying production-grade ML solutions, with built-in `langchain` & `llama_index` integrations.
135 | - [Dify](https://github.com/langgenius/dify) ![](https://img.shields.io/github/stars/langgenius/dify.svg?style=social) - Open-source framework aims to enable developers (and even non-developers) to quickly build useful applications based on large language models, ensuring they are visual, operable, and improvable.
136 | 
137 | **[⬆ back to ToC](#table-of-contents)**
138 | 
139 | # Search
140 | 
141 | ## Vector search
142 | 
143 | - [AquilaDB](https://github.com/Aquila-Network/AquilaDB) ![](https://img.shields.io/github/stars/Aquila-Network/AquilaDB.svg?style=social) - An easy to use Neural Search Engine. Index latent vectors along with JSON metadata and do efficient k-NN search.
144 | - [Chroma](https://github.com/chroma-core/chroma) ![](https://img.shields.io/github/stars/chroma-core/chroma.svg?style=social) - the open source embedding database
145 | - [Jina](https://github.com/jina-ai/jina) ![](https://img.shields.io/github/stars/jina-ai/jina.svg?style=social) - Build multimodal AI services via cloud native technologies · Neural Search · Generative AI · Cloud Native
146 | - [Marqo](https://github.com/marqo-ai/marqo) ![](https://img.shields.io/github/stars/marqo-ai/marqo.svg?style=social) - Tensor search for humans.
147 | - [Milvus](https://github.com/milvus-io/milvus) ![](https://img.shields.io/github/stars/milvus-io/milvus.svg?style=social) - Vector database for scalable similarity search and AI applications.
148 | - [Pinecone](https://www.pinecone.io/) - The Pinecone vector database makes it easy to build high-performance vector search applications. Developer-friendly, fully managed, and easily scalable without infrastructure hassles.
149 | - [pgvector](https://github.com/pgvector/pgvector) ![](https://img.shields.io/github/stars/pgvector/pgvector.svg?style=social) - Open-source vector similarity search for Postgres.
150 | - [pgvecto.rs](https://github.com/tensorchord/pgvecto.rs) ![](https://img.shields.io/github/stars/tensorchord/pgvecto.rs.svg?style=social) - Vector database plugin for Postgres, written in Rust, specifically designed for LLM.
151 | - [Qdrant](https://github.com/qdrant/qdrant) ![](https://img.shields.io/github/stars/qdrant/qdrant.svg?style=social) - Vector Search Engine and Database for the next generation of AI applications. Also available in the cloud
152 | - [txtai](https://github.com/neuml/txtai) ![](https://img.shields.io/github/stars/neuml/txtai.svg?style=social) - Build AI-powered semantic search applications
153 | - [Vald](https://github.com/vdaas/vald) ![](https://img.shields.io/github/stars/vdaas/vald.svg?style=social) - A Highly Scalable Distributed Vector Search Engine
154 | - [Vearch](https://github.com/vearch/vearch) ![](https://img.shields.io/github/stars/vearch/vearch.svg?style=social) - A distributed system for embedding-based vector retrieval
155 | - [Weaviate](https://github.com/semi-technologies/weaviate) ![](https://img.shields.io/github/stars/semi-technologies/weaviate.svg?style=social) - Weaviate is an open source vector search engine that stores both objects and vectors, allowing for combining vector search with structured filtering with the fault-tolerance and scalability of a cloud-native database, all accessible through GraphQL, REST, and various language clients.
156 | 
157 | **[⬆ back to ToC](#table-of-contents)**
158 | 
159 | # Code AI
160 | 
161 | - [CodeGen](https://github.com/salesforce/CodeGen) ![](https://img.shields.io/github/stars/salesforce/CodeGen.svg?style=social) - CodeGen is an open-source model for program synthesis. Trained on TPU-v4. Competitive with OpenAI Codex.
162 | - [CodeT5](https://github.com/salesforce/CodeT5) ![](https://img.shields.io/github/stars/salesforce/CodeT5.svg?style=social) - Open Code LLMs for Code Understanding and Generation.
163 | - [fauxpilot](https://github.com/fauxpilot/fauxpilot) ![](https://img.shields.io/github/stars/fauxpilot/fauxpilot.svg?style=social) - An open-source alternative to GitHub Copilot server
164 | - [tabby](https://github.com/TabbyML/tabby) ![](https://img.shields.io/github/stars/TabbyML/tabby.svg?style=social) - Self-hosted AI coding assistant. An opensource / on-prem alternative to GitHub Copilot.
165 | 
166 | # Training
167 | 
168 | ## IDEs and Workspaces
169 | 
170 | - [code server](https://github.com/coder/code-server) ![](https://img.shields.io/github/stars/coder/code-server.svg?style=social) - Run VS Code on any machine anywhere and access it in the browser.
171 | - [conda](https://github.com/conda/conda) ![](https://img.shields.io/github/stars/conda/conda.svg?style=social) - OS-agnostic, system-level binary package manager and ecosystem.
172 | - [Docker](https://github.com/moby/moby) ![](https://img.shields.io/github/stars/moby/moby.svg?style=social) - Moby is an open-source project created by Docker to enable and accelerate software containerization.
173 | - [envd](https://github.com/tensorchord/envd) ![](https://img.shields.io/github/stars/tensorchord/envd.svg?style=social) - 🏕️ Reproducible development environment for AI/ML.
174 | - [Jupyter Notebooks](https://github.com/jupyter/notebook) ![](https://img.shields.io/github/stars/jupyter/notebook.svg?style=social) - The Jupyter notebook is a web-based notebook environment for interactive computing.
175 | - [Kurtosis](https://github.com/kurtosis-tech/kurtosis) ![](https://img.shields.io/github/stars/kurtosis-tech/kurtosis.svg?style=social) - A build, packaging, and run system for ephemeral multi-container environments.
176 | 
177 | **[⬆ back to ToC](#table-of-contents)**
178 | 
179 | ## Foundation Model Fine Tuning
180 | 
181 | - [alpaca-lora](https://github.com/tloen/alpaca-lora) ![](https://img.shields.io/github/stars/tloen/alpaca-lora.svg?style=social) - Instruct-tune LLaMA on consumer hardware
182 | - [LMFlow](https://github.com/OptimalScale/LMFlow) ![](https://img.shields.io/github/stars/OptimalScale/LMFlow.svg?style=social) - An Extensible Toolkit for Finetuning and Inference of Large Foundation Models
183 | - [Lora](https://github.com/cloneofsimo/lora) ![](https://img.shields.io/github/stars/cloneofsimo/lora.svg?style=social) - Using Low-rank adaptation to quickly fine-tune diffusion models.
184 | - [peft](https://github.com/huggingface/peft) ![](https://img.shields.io/github/stars/huggingface/peft.svg?style=social) - State-of-the-art Parameter-Efficient Fine-Tuning.
185 | - [p-tuning-v2](https://github.com/THUDM/P-tuning-v2) ![](https://img.shields.io/github/stars/THUDM/P-tuning-v2.svg?style=social) - An optimized prompt tuning strategy achieving comparable performance to fine-tuning on small/medium-sized models and sequence tagging challenges. [(ACL 2022)](https://arxiv.org/abs/2110.07602)
186 | - [QLoRA](https://github.com/artidoro/qlora) ![](https://img.shields.io/github/stars/artidoro/qlora.svg?style=social) - Efficient finetuning approach that reduces memory usage enough to finetune a 65B parameter model on a single 48GB GPU while preserving full 16-bit finetuning task performance.
187 | 
188 | **[⬆ back to ToC](#table-of-contents)**
189 | 
190 | ## Frameworks for Training
191 | 
192 | - [Accelerate](https://github.com/huggingface/accelerate) ![](https://img.shields.io/github/stars/huggingface/accelerate.svg?style=social) - 🚀 A simple way to train and use PyTorch models with multi-GPU, TPU, mixed-precision.
193 | - [Apache MXNet](https://github.com/apache/mxnet) ![](https://img.shields.io/github/stars/apache/mxnet.svg?style=social) - Lightweight, Portable, Flexible Distributed/Mobile Deep Learning with Dynamic, Mutation-aware Dataflow Dep Scheduler.
194 | - [Caffe](https://github.com/BVLC/caffe) ![](https://img.shields.io/github/stars/BVLC/caffe.svg?style=social) - A fast open framework for deep learning.
195 | - [ColossalAI](https://github.com/hpcaitech/ColossalAI) ![](https://img.shields.io/github/stars/hpcaitech/ColossalAI.svg?style=social) - An integrated large-scale model training system with efficient parallelization techniques.
196 | - [DeepSpeed](https://github.com/microsoft/DeepSpeed) ![](https://img.shields.io/github/stars/microsoft/DeepSpeed.svg?style=social) - DeepSpeed is a deep learning optimization library that makes distributed training and inference easy, efficient, and effective.
197 | - [Horovod](https://github.com/horovod/horovod) ![](https://img.shields.io/github/stars/horovod/horovod.svg?style=social) - Distributed training framework for TensorFlow, Keras, PyTorch, and Apache MXNet.
198 | - [Jax](https://github.com/google/jax) ![](https://img.shields.io/github/stars/google/jax.svg?style=social) - Autograd and XLA for high-performance machine learning research.
199 | - [Kedro](https://github.com/kedro-org/kedro) ![](https://img.shields.io/github/stars/kedro-org/kedro.svg?style=social) - Kedro is an open-source Python framework for creating reproducible, maintainable and modular data science code.
200 | - [Keras](https://github.com/keras-team/keras) ![](https://img.shields.io/github/stars/keras-team/keras.svg?style=social) - Keras is a deep learning API written in Python, running on top of the machine learning platform TensorFlow.
201 | - [LightGBM](https://github.com/microsoft/LightGBM) ![](https://img.shields.io/github/stars/microsoft/LightGBM.svg?style=social) - A fast, distributed, high performance gradient boosting (GBT, GBDT, GBRT, GBM or MART) framework based on decision tree algorithms, used for ranking, classification and many other machine learning tasks.
202 | - [MegEngine](https://github.com/MegEngine/MegEngine) ![](https://img.shields.io/github/stars/MegEngine/MegEngine.svg?style=social) - MegEngine is a fast, scalable and easy-to-use deep learning framework, with auto-differentiation.
203 | - [metric-learn](https://github.com/scikit-learn-contrib/metric-learn) ![](https://img.shields.io/github/stars/scikit-learn-contrib/metric-learn.svg?style=social) - Metric Learning Algorithms in Python.
204 | - [MindSpore](https://github.com/mindspore-ai/mindspore) ![](https://img.shields.io/github/stars/mindspore-ai/mindspore.svg?style=social) - MindSpore is a new open source deep learning training/inference framework that could be used for mobile, edge and cloud scenarios.
205 | - [Oneflow](https://github.com/Oneflow-Inc/oneflow) ![](https://img.shields.io/github/stars/Oneflow-Inc/oneflow.svg?style=social) - OneFlow is a performance-centered and open-source deep learning framework.
206 | - [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) ![](https://img.shields.io/github/stars/PaddlePaddle/Paddle.svg?style=social) - Machine Learning Framework from Industrial Practice.
207 | - [PyTorch](https://github.com/pytorch/pytorch) ![](https://img.shields.io/github/stars/pytorch/pytorch.svg?style=social) - Tensors and Dynamic neural networks in Python with strong GPU acceleration.
208 | - [PyTorchLightning](https://github.com/PyTorchLightning/pytorch-lightning) ![](https://img.shields.io/github/stars/PyTorchLightning/pytorch-lightning.svg?style=social) - The lightweight PyTorch wrapper for high-performance AI research. Scale your models, not the boilerplate.
209 | - [XGBoost](https://github.com/dmlc/xgboost) ![](https://img.shields.io/github/stars/dmlc/xgboost.svg?style=social) - Scalable, Portable and Distributed Gradient Boosting (GBDT, GBRT or GBM) Library.
210 | - [scikit-learn](https://github.com/scikit-learn/scikit-learn) ![](https://img.shields.io/github/stars/scikit-learn/scikit-learn.svg?style=social) - Machine Learning in Python.
211 | - [TensorFlow](https://github.com/tensorflow/tensorflow) ![](https://img.shields.io/github/stars/tensorflow/tensorflow.svg?style=social) - An Open Source Machine Learning Framework for Everyone.
212 | - [VectorFlow](https://github.com/Netflix/vectorflow) ![](https://img.shields.io/github/stars/Netflix/vectorflow.svg?style=social) - A minimalist neural network library optimized for sparse data and single machine environments.
213 | 
214 | **[⬆ back to ToC](#table-of-contents)**
215 | 
216 | ## Experiment Tracking
217 | 
218 | - [Aim](https://github.com/aimhubio/aim) ![](https://img.shields.io/github/stars/aimhubio/aim.svg?style=social) - an easy-to-use and performant open-source experiment tracker.
219 | - [ClearML](https://github.com/allegroai/clearml) ![](https://img.shields.io/github/stars/allegroai/clearml.svg?style=social) - Auto-Magical CI/CD to streamline your ML workflow. Experiment Manager, MLOps and Data-Management
220 | - [Guild AI](https://github.com/guildai/guildai) ![](https://img.shields.io/github/stars/guildai/guildai.svg?style=social) - Experiment tracking, ML developer tools.
221 | - [MLRun](https://github.com/mlrun/mlrun) ![](https://img.shields.io/github/stars/mlrun/mlrun.svg?style=social) - Machine Learning automation and tracking.
222 | - [Kedro-Viz](https://github.com/kedro-org/kedro-viz) ![](https://img.shields.io/github/stars/kedro-org/kedro-viz.svg?style=social) - Kedro-Viz is an interactive development tool for building data science pipelines with Kedro. Kedro-Viz also allows users to view and compare different runs in the Kedro project.
223 | - [LabNotebook](https://github.com/henripal/labnotebook) ![](https://img.shields.io/github/stars/henripal/labnotebook.svg?style=social) - LabNotebook is a tool that allows you to flexibly monitor, record, save, and query all your machine learning experiments.
224 | - [Sacred](https://github.com/IDSIA/sacred) ![](https://img.shields.io/github/stars/IDSIA/sacred.svg?style=social) - Sacred is a tool to help you configure, organize, log and reproduce experiments.
225 | - [Weights & Biases](https://github.com/wandb/wandb) ![](https://img.shields.io/github/stars/wandb/wandb.svg?style=social) - A developer first, lightweight, user-friendly experiment tracking and visualization tool for machine learning projects, streamlining collaboration and simplifying MLOps. W&B excels at tracking LLM-powered applications, featuring W&B Prompts for LLM execution flow visualization, input and output monitoring, and secure management of prompts and LLM chain configurations.
226 | 
227 | **[⬆ back to ToC](#table-of-contents)**
228 | 
229 | ## Visualization
230 | 
231 | - [Maniford](https://github.com/uber/manifold) ![](https://img.shields.io/github/stars/uber/manifold.svg?style=social) - A model-agnostic visual debugging tool for machine learning.
232 | - [netron](https://github.com/lutzroeder/netron) ![](https://img.shields.io/github/stars/lutzroeder/netron.svg?style=social) - Visualizer for neural network, deep learning, and machine learning models.
233 | - [OpenOps](https://github.com/ThePlugJumbo/openops) ![](https://img.shields.io/github/stars/theplugjumbo/openops.svg?style=social) - Bring multiple data streams into one dashboard.
234 | - [TensorBoard](https://github.com/tensorflow/tensorboard) ![](https://img.shields.io/github/stars/tensorflow/tensorboard.svg?style=social) - TensorFlow's Visualization Toolkit.
235 | - [TensorSpace](https://github.com/tensorspace-team/tensorspace) ![](https://img.shields.io/github/stars/tensorspace-team/tensorspace.svg?style=social) - Neural network 3D visualization framework, build interactive and intuitive model in browsers, support pre-trained deep learning models from TensorFlow, Keras, TensorFlow.js.
236 | - [dtreeviz](https://github.com/parrt/dtreeviz) ![](https://img.shields.io/github/stars/parrt/dtreeviz.svg?style=social) - A python library for decision tree visualization and model interpretation.
237 | - [Zetane Viewer](https://github.com/zetane/viewer) ![](https://img.shields.io/github/stars/zetane/viewer.svg?style=social) - ML models and internal tensors 3D visualizer.
238 | - [Zeno](https://github.com/zeno-ml/zeno) ![](https://img.shields.io/github/stars/zeno-ml/zeno.svg?style=social) - AI evaluation platform for interactively exploring data and model outputs.
239 | 
240 | **[⬆ back to ToC](#table-of-contents)**
241 | 
242 | # Data
243 | 
244 | ## Data Management
245 | 
246 | - [ArtiVC](https://github.com/InfuseAI/ArtiVC) ![](https://img.shields.io/github/stars/InfuseAI/ArtiVC.svg?style=social) - A version control system to manage large files.
247 |   Lake is a dataset format with a simple API for creating, storing, and collaborating on AI datasets of any size.
248 | - [Dolt](https://github.com/dolthub/dolt) ![](https://img.shields.io/github/stars/dolthub/dolt.svg?style=social) - Git for Data.
249 | - [DVC](https://github.com/iterative/dvc) ![](https://img.shields.io/github/stars/iterative/dvc.svg?style=social) - Data Version Control | Git for Data & Models | ML Experiments Management.
250 | - [Delta-Lake](https://github.com/delta-io/delta) ![](https://img.shields.io/github/stars/delta-io/delta.svg?style=social) - Storage layer that brings scalable, ACID transactions to Apache Spark and other engines.
251 | - [Pachyderm](https://github.com/pachyderm/pachyderm) ![](https://img.shields.io/github/stars/pachyderm/pachyderm.svg?style=social) - Pachyderm is a version control system for data.
252 | - [Quilt](https://github.com/quiltdata/quilt) ![](https://img.shields.io/github/stars/quiltdata/quilt.svg?style=social) - A self-organizing data hub for S3.
253 | 
254 | **[⬆ back to ToC](#table-of-contents)**
255 | 
256 | ## Data Storage
257 | 
258 | - [JuiceFS](https://github.com/juicedata/juicefs) ![](https://img.shields.io/github/stars/juicedata/juicefs.svg?style=social) - A distributed POSIX file system built on top of Redis and S3.
259 | - [LakeFS](https://github.com/treeverse/lakeFS) ![](https://img.shields.io/github/stars/treeverse/lakeFS.svg?style=social) - Git-like capabilities for your object storage.
260 | - [Lance](https://github.com/eto-ai/lance) ![](https://img.shields.io/github/stars/eto-ai/lance.svg?style=social) - Modern columnar data format for ML implemented in Rust.
261 | 
262 | **[⬆ back to ToC](#table-of-contents)**
263 | 
264 | ## Data Tracking
265 | 
266 | - [Piperider](https://github.com/InfuseAI/piperider) ![](https://img.shields.io/github/stars/InfuseAI/piperider.svg?style=social) - A CLI tool that allows you to build data profiles and write assertion tests for easily evaluating and tracking your data's reliability over time.
267 | - [LUX](https://github.com/lux-org/lux) ![](https://img.shields.io/github/stars/lux-org/lux.svg?style=social) - A Python library that facilitates fast and easy data exploration by automating the visualization and data analysis process.
268 | 
269 | **[⬆ back to ToC](#table-of-contents)**
270 | 
271 | ## Feature Engineering
272 | 
273 | - [Featureform](https://github.com/featureform/featureform) ![](https://img.shields.io/github/stars/featureform/featureform.svg?style=social) - The Virtual Feature Store. Turn your existing data infrastructure into a feature store.
274 | - [FeatureTools](https://github.com/Featuretools/featuretools) ![](https://img.shields.io/github/stars/Featuretools/featuretools.svg?style=social) - An open source python framework for automated feature engineering
275 | 
276 | **[⬆ back to ToC](#table-of-contents)**
277 | 
278 | ## Data/Feature enrichment
279 | 
280 | - [Upgini](https://github.com/upgini/upgini) ![](https://img.shields.io/github/stars/upgini/upgini.svg?style=social) - Free automated data & feature enrichment library for machine learning: automatically searches through thousands of ready-to-use features from public and community shared data sources and enriches your training dataset with only the accuracy improving features
281 | - [Feast](https://github.com/feast-dev/feast) ![](https://img.shields.io/github/stars/feast-dev/feast.svg?style=social) - An open source feature store for machine learning.
282 | 
283 | **[⬆ back to ToC](#table-of-contents)**
284 | 
285 | # Large Scale Deployment
286 | 
287 | ## ML Platforms
288 | 
289 | - [ClearML](https://github.com/allegroai/clearml) ![](https://img.shields.io/github/stars/allegroai/clearml.svg?style=social) - Auto-Magical CI/CD to streamline your ML workflow. Experiment Manager, MLOps and Data-Management.
290 | - [MLflow](https://github.com/mlflow/mlflow) ![](https://img.shields.io/github/stars/mlflow/mlflow.svg?style=social) - Open source platform for the machine learning lifecycle.
291 | - [MLRun](https://github.com/mlrun/mlrun) ![](https://img.shields.io/github/stars/mlrun/mlrun.svg?style=social) - An open MLOps platform for quickly building and managing continuous ML applications across their lifecycle.
292 | - [ModelFox](https://github.com/modelfoxdotdev/modelfox) ![](https://img.shields.io/github/stars/modelfoxdotdev/modelfox.svg?style=social) - ModelFox is a platform for managing and deploying machine learning models.
293 | - [Kserve](https://github.com/kserve/kserve) ![](https://img.shields.io/github/stars/kserve/kserve.svg?style=social) - Standardized Serverless ML Inference Platform on Kubernetes
294 | - [Kubeflow](https://github.com/kubeflow/kubeflow) ![](https://img.shields.io/github/stars/kubeflow/kubeflow.svg?style=social) - Machine Learning Toolkit for Kubernetes.
295 | - [PAI](https://github.com/microsoft/pai) ![](https://img.shields.io/github/stars/microsoft/pai.svg?style=social) - Resource scheduling and cluster management for AI.
296 | - [Polyaxon](https://github.com/polyaxon/polyaxon) ![](https://img.shields.io/github/stars/polyaxon/polyaxon.svg?style=social) - Machine Learning Management & Orchestration Platform.
297 | - [Primehub](https://github.com/InfuseAI/primehub) ![](https://img.shields.io/github/stars/InfuseAI/primehub.svg?style=social) - An effortless infrastructure for machine learning built on the top of Kubernetes.
298 | - [Seldon-core](https://github.com/SeldonIO/seldon-core) ![](https://img.shields.io/github/stars/SeldonIO/seldon-core.svg?style=social) - An MLOps framework to package, deploy, monitor and manage thousands of production machine learning models
299 | - [Weights & Biases](https://github.com/wandb/wandb) ![](https://img.shields.io/github/stars/wandb/wandb.svg?style=social) - A lightweight and flexible platform for machine learning experiment tracking, dataset versioning, and model management, enhancing collaboration and streamlining MLOps workflows. W&B excels at tracking LLM-powered applications, featuring W&B Prompts for LLM execution flow visualization, input and output monitoring, and secure management of prompts and LLM chain configurations.
300 | 
301 | **[⬆ back to ToC](#table-of-contents)**
302 | 
303 | ## Workflow
304 | 
305 | - [Airflow](https://airflow.apache.org/) ![](https://img.shields.io/github/stars/apache/airflow?style=social) - A platform to programmatically author, schedule and monitor workflows.
306 | - [aqueduct](https://github.com/aqueducthq/aqueduct) ![](https://img.shields.io/github/stars/aqueducthq/aqueduct.svg?style=social) - An Open-Source Platform for Production Data Science
307 | - [Argo Workflows](https://github.com/argoproj/argo-workflows) ![](https://img.shields.io/github/stars/argoproj/argo-workflows.svg?style=social) - Workflow engine for Kubernetes.
308 | - [Flyte](https://github.com/flyteorg/flyte) ![](https://img.shields.io/github/stars/flyteorg/flyte.svg?style=social) - Kubernetes-native workflow automation platform for complex, mission-critical data and ML processes at scale.
309 | - [Kubeflow Pipelines](https://github.com/kubeflow/pipelines) ![](https://img.shields.io/github/stars/kubeflow/pipelines.svg?style=social) - Machine Learning Pipelines for Kubeflow.
310 | - [LangFlow](https://github.com/logspace-ai/langflow) ![](https://img.shields.io/github/stars/logspace-ai/langflow.svg?style=social) - An effortless way to experiment and prototype LangChain flows with drag-and-drop components and a chat interface.
311 | - [Metaflow](https://github.com/Netflix/metaflow) ![](https://img.shields.io/github/stars/Netflix/metaflow.svg?style=social) - Build and manage real-life data science projects with ease!
312 | - [Ploomber](https://github.com/ploomber/ploomber) ![](https://img.shields.io/github/stars/ploomber/ploomber.svg?style=social) - The fastest way to build data pipelines. Develop iteratively, deploy anywhere.
313 | - [Prefect](https://github.com/PrefectHQ/prefect) ![](https://img.shields.io/github/stars/PrefectHQ/prefect.svg?style=social) - The easiest way to automate your data.
314 | - [VDP](https://github.com/instill-ai/vdp) ![](https://img.shields.io/github/stars/instill-ai/vdp.svg?style=social) - An open-source unstructured data ETL tool to streamline the end-to-end unstructured data processing pipeline.
315 | - [ZenML](https://github.com/zenml-io/zenml) ![](https://img.shields.io/github/stars/zenml-io/zenml.svg?style=social) - MLOps framework to create reproducible pipelines.
316 | 
317 | **[⬆ back to ToC](#table-of-contents)**
318 | 
319 | ## Scheduling
320 | 
321 | - [Kueue](https://github.com/kubernetes-sigs/kueue) ![](https://img.shields.io/github/stars/kubernetes-sigs/kueue.svg?style=social) - Kubernetes-native Job Queueing.
322 | - [PAI](https://github.com/microsoft/pai) ![](https://img.shields.io/github/stars/microsoft/pai.svg?style=social) - Resource scheduling and cluster management for AI (Open-sourced by Microsoft).
323 | - [Slurm](https://github.com/SchedMD/slurm) ![](https://img.shields.io/github/stars/SchedMD/slurm.svg?style=social) - A Highly Scalable Workload Manager.
324 | - [Volcano](https://github.com/volcano-sh/volcano) ![](https://img.shields.io/github/stars/volcano-sh/volcano.svg?style=social) - A Cloud Native Batch System (Project under CNCF).
325 | - [Yunikorn](https://github.com/apache/yunikorn-core) ![](https://img.shields.io/github/stars/apache/yunikorn-core.svg?style=social) - Light-weight, universal resource scheduler for container orchestrator systems.
326 | 
327 | **[⬆ back to ToC](#table-of-contents)**
328 | 
329 | ## Model Management
330 | 
331 | - [dvc](https://github.com/iterative/dvc) ![](https://img.shields.io/github/stars/iterative/dvc.svg?style=social) - Data Version Control | Git for Data & Models | ML Experiments Management
332 | - [ModelDB](https://github.com/VertaAI/modeldb) ![](https://img.shields.io/github/stars/VertaAI/modeldb.svg?style=social) - Open Source ML Model Versioning, Metadata, and Experiment Management
333 | - [MLEM](https://github.com/iterative/mlem) ![](https://img.shields.io/github/stars/iterative/mlem.svg?style=social) - A tool to package, serve, and deploy any ML model on any platform.
334 | - [ormb](https://github.com/kleveross/ormb) ![](https://img.shields.io/github/stars/kleveross/ormb.svg?style=social) - Docker for Your ML/DL Models Based on OCI Artifacts
335 | 
336 | **[⬆ back to ToC](#table-of-contents)**
337 | 
338 | # Performance
339 | 
340 | ## ML Compiler
341 | 
342 | - [ONNX-MLIR](https://github.com/onnx/onnx-mlir) ![](https://img.shields.io/github/stars/onnx/onnx-mlir.svg?style=social) - Compiler technology to transform a valid Open Neural Network Exchange (ONNX) graph into code that implements the graph with minimum runtime support.
343 | - [TVM](https://github.com/apache/tvm) ![](https://img.shields.io/github/stars/apache/tvm.svg?style=social) - Open deep learning compiler stack for cpu, gpu and specialized accelerators
344 | 
345 | **[⬆ back to ToC](#table-of-contents)**
346 | 
347 | ## Profiling
348 | 
349 | - [octoml-profile](https://github.com/octoml/octoml-profile) ![](https://img.shields.io/github/stars/octoml/octoml-profile.svg?style=social) - octoml-profile is a python library and cloud service designed to provide the simplest experience for assessing and optimizing the performance of PyTorch models on cloud hardware with state-of-the-art ML acceleration technology.
350 | - [scalene](https://github.com/plasma-umass/scalene) ![](https://img.shields.io/github/stars/plasma-umass/scalene.svg?style=social) - a high-performance, high-precision CPU, GPU, and memory profiler for Python
351 | 
352 | **[⬆ back to ToC](#table-of-contents)**
353 | 
354 | # AutoML
355 | 
356 | - [Archai](https://github.com/microsoft/archai) ![](https://img.shields.io/github/stars/microsoft/archai.svg?style=social) - a platform for Neural Network Search (NAS) that allows you to generate efficient deep networks for your applications.
357 | - [autoai](https://github.com/blobcity/autoai) ![](https://img.shields.io/github/stars/blobcity/autoai.svg?style=social) - A framework to find the best performing AI/ML model for any AI problem.
358 | - [AutoGL](https://github.com/THUMNLab/AutoGL) ![](https://img.shields.io/github/stars/THUMNLab/AutoGL.svg?style=social) - An autoML framework & toolkit for machine learning on graphs
359 | - [AutoGluon](https://github.com/awslabs/autogluon) ![](https://img.shields.io/github/stars/awslabs/autogluon.svg?style=social) - AutoML for Image, Text, and Tabular Data.
360 | - [automl-gs](https://github.com/minimaxir/automl-gs) ![](https://img.shields.io/github/stars/minimaxir/automl-gs.svg?style=social) - Provide an input CSV and a target field to predict, generate a model + code to run it.
361 | - [autokeras](https://github.com/keras-team/autokeras) ![](https://img.shields.io/github/stars/keras-team/autokeras.svg?style=social) - AutoML library for deep learning.
362 | - [Auto-PyTorch](https://github.com/automl/Auto-PyTorch) ![](https://img.shields.io/github/stars/automl/Auto-PyTorch.svg?style=social) - Automatic architecture search and hyperparameter optimization for PyTorch.
363 | - [auto-sklearn](https://github.com/automl/auto-sklearn) ![](https://img.shields.io/github/stars/automl/auto-sklearn.svg?style=social) - an automated machine learning toolkit and a drop-in replacement for a scikit-learn estimator.
364 | - [Dragonfly](https://github.com/dragonfly/dragonfly) ![](https://img.shields.io/github/stars/dragonfly/dragonfly.svg?style=social) - An open source python library for scalable Bayesian optimisation.
365 | - [Determined](https://github.com/determined-ai/determined) ![](https://img.shields.io/github/stars/determined-ai/determined.svg?style=social) - scalable deep learning training platform with integrated hyperparameter tuning support; includes Hyperband, PBT, and other search methods.
366 | - [DEvol (DeepEvolution)](https://github.com/joeddav/devol) ![](https://img.shields.io/github/stars/joeddav/devol.svg?style=social) - a basic proof of concept for genetic architecture search in Keras.
367 | - [EvalML](https://github.com/alteryx/evalml) ![](https://img.shields.io/github/stars/alteryx/evalml.svg?style=social) - An open source python library for AutoML.
368 | - [FEDOT](https://github.com/nccr-itmo/FEDOT) ![](https://img.shields.io/github/stars/nccr-itmo/FEDOT.svg?style=social) - AutoML framework for the design of composite pipelines.
369 | - [FLAML](https://github.com/microsoft/FLAML) ![](https://img.shields.io/github/stars/microsoft/FLAML.svg?style=social) - Fast and lightweight AutoML ([paper](https://www.microsoft.com/en-us/research/publication/flaml-a-fast-and-lightweight-automl-library/)).
370 | - [Goptuna](https://github.com/c-bata/goptuna) ![](https://img.shields.io/github/stars/c-bata/goptuna.svg?style=social) - A hyperparameter optimization framework, inspired by Optuna.
371 | - [HpBandSter](https://github.com/automl/HpBandSter) ![](https://img.shields.io/github/stars/automl/HpBandSter.svg?style=social) - a framework for distributed hyperparameter optimization.
372 | - [HPOlib2](https://github.com/automl/HPOlib2) ![](https://img.shields.io/github/stars/automl/HPOlib2.svg?style=social) - a library for hyperparameter optimization and black box optimization benchmarks.
373 | - [Hyperband](https://github.com/zygmuntz/hyperband) ![](https://img.shields.io/github/stars/zygmuntz/hyperband.svg?style=social) - open source code for tuning hyperparams with Hyperband.
374 | - [Hypernets](https://github.com/DataCanvasIO/Hypernets) ![](https://img.shields.io/github/stars/DataCanvasIO/Hypernets.svg?style=social) - A General Automated Machine Learning Framework.
375 | - [Hyperopt](https://github.com/hyperopt/hyperopt) ![](https://img.shields.io/github/stars/hyperopt/hyperopt.svg?style=social) - Distributed Asynchronous Hyperparameter Optimization in Python.
376 | - [hyperunity](https://github.com/gdikov/hypertunity) ![](https://img.shields.io/github/stars/gdikov/hypertunity.svg?style=social) - A toolset for black-box hyperparameter optimisation.
377 | - [Katib](https://github.com/kubeflow/katib) ![](https://img.shields.io/github/stars/kubeflow/katib.svg?style=social) - Katib is a Kubernetes-native project for automated machine learning (AutoML).
378 | - [Keras Tuner](https://github.com/keras-team/keras-tuner) ![](https://img.shields.io/github/stars/keras-team/keras-tuner.svg?style=social) - Hyperparameter tuning for humans.
379 | - [learn2learn](https://github.com/learnables/learn2learn) ![](https://img.shields.io/github/stars/learnables/learn2learn.svg?style=social) - PyTorch Meta-learning Framework for Researchers.
380 | - [Ludwig](https://github.com/uber/ludwig) ![](https://img.shields.io/github/stars/uber/ludwig.svg?style=social) - a toolbox built on top of TensorFlow that allows to train and test deep learning models without the need to write code.
381 | - [MOE](https://github.com/Yelp/MOE) ![](https://img.shields.io/github/stars/Yelp/MOE.svg?style=social) - a global, black box optimization engine for real world metric optimization by Yelp.
382 | - [Model Search](https://github.com/google/model_search) ![](https://img.shields.io/github/stars/google/model_search.svg?style=social) - a framework that implements AutoML algorithms for model architecture search at scale.
383 | - [NASGym](https://github.com/gomerudo/nas-env) ![](https://img.shields.io/github/stars/gomerudo/nas-env.svg?style=social) - a proof-of-concept OpenAI Gym environment for Neural Architecture Search (NAS).
384 | - [NNI](https://github.com/Microsoft/nni) ![](https://img.shields.io/github/stars/Microsoft/nni.svg?style=social) - An open source AutoML toolkit for automate machine learning lifecycle, including feature engineering, neural architecture search, model compression and hyper-parameter tuning.
385 | - [Optuna](https://github.com/optuna/optuna) ![](https://img.shields.io/github/stars/optuna/optuna.svg?style=social) - A hyperparameter optimization framework.
386 | - [Pycaret](https://github.com/pycaret/pycaret) ![](https://img.shields.io/github/stars/pycaret/pycaret.svg?style=social) - An open-source, low-code machine learning library in Python that automates machine learning workflows.
387 | - [Ray Tune](github.com/ray-project/ray) ![](https://img.shields.io/github/stars/ect/ray.svg?style=social) - Scalable Hyperparameter Tuning.
388 | - [REMBO](https://github.com/ziyuw/rembo) ![](https://img.shields.io/github/stars/ziyuw/rembo.svg?style=social) - Bayesian optimization in high-dimensions via random embedding.
389 | - [RoBO](https://github.com/automl/RoBO) ![](https://img.shields.io/github/stars/automl/RoBO.svg?style=social) - a Robust Bayesian Optimization framework.
390 | - [scikit-optimize(skopt)](https://github.com/scikit-optimize/scikit-optimize) ![](https://img.shields.io/github/stars/scikit-optimize/scikit-optimize.svg?style=social) - Sequential model-based optimization with a `scipy.optimize` interface.
391 | - [Spearmint](https://github.com/HIPS/Spearmint) ![](https://img.shields.io/github/stars/HIPS/Spearmint.svg?style=social) - a software package to perform Bayesian optimization.
392 | - [TPOT](http://automl.info/tpot/) ![](https://img.shields.io/github/stars/tpot/.svg?style=social) - one of the very first AutoML methods and open-source software packages.
393 | - [Torchmeta](https://github.com/tristandeleu/pytorch-meta) ![](https://img.shields.io/github/stars/tristandeleu/pytorch-meta.svg?style=social) - A Meta-Learning library for PyTorch.
394 | - [Vegas](https://github.com/huawei-noah/vega) ![](https://img.shields.io/github/stars/huawei-noah/vega.svg?style=social) - an AutoML algorithm tool chain by Huawei Noah's Arb Lab.
395 | 
396 | **[⬆ back to ToC](#table-of-contents)**
397 | 
398 | # Optimizations
399 | 
400 | - [FeatherCNN](https://github.com/Tencent/FeatherCNN) ![](https://img.shields.io/github/stars/Tencent/FeatherCNN.svg?style=social) - FeatherCNN is a high performance inference engine for convolutional neural networks.
401 | - [Forward](https://github.com/Tencent/Forward) ![](https://img.shields.io/github/stars/Tencent/Forward.svg?style=social) - A library for high performance deep learning inference on NVIDIA GPUs.
402 | - [NCNN](https://github.com/Tencent/ncnn) ![](https://img.shields.io/github/stars/Tencent/ncnn.svg?style=social) - ncnn is a high-performance neural network inference framework optimized for the mobile platform.
403 | - [PocketFlow](https://github.com/Tencent/PocketFlow) ![](https://img.shields.io/github/stars/Tencent/PocketFlow.svg?style=social) - use AutoML to do model compression.
404 | - [TensorFlow Model Optimization](https://github.com/tensorflow/model-optimization) ![](https://img.shields.io/github/stars/tensorflow/model-optimization.svg?style=social) - A suite of tools that users, both novice and advanced, can use to optimize machine learning models for deployment and execution.
405 | - [TNN](https://github.com/Tencent/TNN) ![](https://img.shields.io/github/stars/Tencent/TNN.svg?style=social) - A uniform deep learning inference framework for mobile, desktop and server.
406 | 
407 | **[⬆ back to ToC](#table-of-contents)**
408 | 
409 | # Federated ML
410 | 
411 | - [EasyFL](https://github.com/EasyFL-AI/EasyFL) ![](https://img.shields.io/github/stars/EasyFL-AI/EasyFL.svg?style=social) - An Easy-to-use Federated Learning Platform
412 | - [FATE](https://github.com/FederatedAI/FATE) ![](https://img.shields.io/github/stars/FederatedAI/FATE.svg?style=social) - An Industrial Grade Federated Learning Framework
413 | - [FedML](https://github.com/FedML-AI/FedML) ![](https://img.shields.io/github/stars/FedML-AI/FedML.svg?style=social) - The federated learning and analytics library enabling secure and collaborative machine learning on decentralized data anywhere at any scale. Supporting large-scale cross-silo federated learning, cross-device federated learning on smartphones/IoTs, and research simulation.
414 | - [Flower](https://github.com/adap/flower) ![](https://img.shields.io/github/stars/adap/flower.svg?style=social) - A Friendly Federated Learning Framework
415 | - [Harmonia](https://github.com/ailabstw/harmonia) ![](https://img.shields.io/github/stars/ailabstw/harmonia.svg?style=social) - Harmonia is an open-source project aiming at developing systems/infrastructures and libraries to ease the adoption of federated learning (abbreviated to FL) for researches and production usage.
416 | - [TensorFlow Federated](https://github.com/tensorflow/federated) ![](https://img.shields.io/github/stars/tensorflow/federated.svg?style=social) - A framework for implementing federated learning
417 | 
418 | **[⬆ back to ToC](#table-of-contents)**
419 | 
420 | # Awesome Lists
421 | 
422 | - [Awesome Argo](https://github.com/terrytangyuan/awesome-argo) ![](https://img.shields.io/github/stars/terrytangyuan/awesome-argo.svg?style=social) - A curated list of awesome projects and resources related to Argo
423 | - [Awesome AutoDL](https://github.com/D-X-Y/Awesome-AutoDL) ![](https://img.shields.io/github/stars/D-X-Y/Awesome-AutoDL.svg?style=social) - Automated Deep Learning: Neural Architecture Search Is Not the End (a curated list of AutoDL resources and an in-depth analysis)
424 | - [Awesome AutoML](https://github.com/windmaple/awesome-AutoML) ![](https://img.shields.io/github/stars/windmaple/awesome-AutoML.svg?style=social) - Curating a list of AutoML-related research, tools, projects and other resources
425 | - [Awesome AutoML Papers](https://github.com/hibayesian/awesome-automl-papers) ![](https://img.shields.io/github/stars/hibayesian/awesome-automl-papers.svg?style=social) - A curated list of automated machine learning papers, articles, tutorials, slides and projects
426 | - [Awesome Federated Learning Systems](https://github.com/AmberLJC/FLsystem-paper/blob/main/README.md)![](https://img.shields.io/github/stars/AmberLJC/FLsystem-paper.svg?style=social) - A curated list of Federated Learning Systems related academic papers, articles, tutorials, slides and projects.
427 | - [Awesome Federated Learning](https://github.com/chaoyanghe/Awesome-Federated-Learning) ![](https://img.shields.io/github/stars/chaoyanghe/Awesome-Federated-Learning.svg?style=social) - A curated list of federated learning publications, re-organized from Arxiv (mostly)
428 | - [awesome-federated-learning](https://github.com/weimingwill/awesome-federated-learning)acc ![](https://img.shields.io/github/stars/weimingwill/awesome-federated-learning.svg?style=social) - All materials you need for Federated Learning: blogs, videos, papers, and softwares, etc.
429 | - [Awesome Open MLOps](https://github.com/fuzzylabs/awesome-open-mlops) ![](https://img.shields.io/github/stars/fuzzylabs/awesome-open-mlops.svg?style=social) - This is the Fuzzy Labs guide to the universe of free and open source MLOps tools.
430 | - [Awesome Production Machine Learning](https://github.com/EthicalML/awesome-production-machine-learning) ![](https://img.shields.io/github/stars/EthicalML/awesome-production-machine-learning.svg?style=social) - A curated list of awesome open source libraries to deploy, monitor, version and scale your machine learning
431 | - [Awesome Tensor Compilers](https://github.com/merrymercy/awesome-tensor-compilers) ![](https://img.shields.io/github/stars/merrymercy/awesome-tensor-compilers.svg?style=social) - A list of awesome compiler projects and papers for tensor computation and deep learning.
432 | - [kelvins/awesome-mlops](https://github.com/kelvins/awesome-mlops) ![](https://img.shields.io/github/stars/kelvins/awesome-mlops.svg?style=social) - A curated list of awesome MLOps tools.
433 | - [visenger/awesome-mlops](https://github.com/visenger/awesome-mlops) ![](https://img.shields.io/github/stars/visenger/awesome-mlops.svg?style=social) - An awesome list of references for MLOps - Machine Learning Operations
434 | - [currentslab/awesome-vector-search](https://github.com/currentslab/awesome-vector-search) ![](https://img.shields.io/github/stars/currentslab/awesome-vector-search.svg?style=social) - A curated list of awesome vector search framework/engine, library, cloud service and research papers to vector similarity search.
435 | 
436 | **[⬆ back to ToC](#table-of-contents)**
437 | 


--------------------------------------------------------------------------------
/contributing.md:
--------------------------------------------------------------------------------
 1 | # Contribution Guidelines
 2 | 
 3 | Please ensure your pull request adheres to the following guidelines:
 4 | 
 5 | - New categories or improvements to the existing categorization are welcome.
 6 | - Search previous suggestions before making a new one, as yours may be a duplicate.
 7 | - Make an individual pull request for each suggestion.
 8 |     - Run `./scripts/generate-star-badges.py` to generate Github star badges if needed.
 9 |     - Run `./scripts/github-markdown-toc ./README.md` to generate ToC if needed.
10 | - Order link titles alphabetically within each category.
11 | 
12 | Thank you for your suggestions!
13 | 


--------------------------------------------------------------------------------
/scripts/generate-star-badges.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import sys
 4 | import shutil
 5 | 
 6 | filename = "README.md"
 7 | filename_backup = "README.md.backup"
 8 | 
 9 | 
10 | def is_link_line(line) -> bool:
11 |     """Return true if the line is a link line."""
12 |     if len(line) < 3 or line[0:3] != "- [":
13 |         return False
14 |     return True
15 | 
16 | def is_github_project(line) -> bool:
17 |     if "https://github.com" in line:
18 |         return True
19 |     return False
20 | 
21 | def contains_star_badge(line) -> bool:
22 |     if "https://img.shields.io/github/stars" in line:
23 |         return True
24 |     return False
25 | 
26 | 
27 | def generate_badge_link(line) -> str:
28 |     first_right_middle_bracket = line.find("]")
29 |     # The text should be `](https://github.com/<>/<>)`
30 |     right_bracket = line[first_right_middle_bracket:].find(")") + first_right_middle_bracket
31 |     project = line[first_right_middle_bracket+2+19:right_bracket]
32 |     print("The project handle of this line is " + project)
33 |     badge_link = " ![](https://img.shields.io/github/stars/" + project + ".svg?style=social)"
34 |     if line[right_bracket+1] != " ":
35 |         badge_link += " "
36 |     newline = line[:right_bracket+1] + badge_link + line[right_bracket+1:]
37 |     print("The new line is " + newline)
38 |     return newline
39 | 
40 | 
41 | def generate_star_badge(line) -> str:
42 |     """Add the GitHub star badge if it does not exist."""
43 |     if not is_link_line(line) or not is_github_project(line):
44 |         "Return other lines unchanged."
45 |         return line
46 |     if contains_star_badge(line):
47 |         return line
48 |     print("This line does not contain the star badge: " + line)
49 |     return generate_badge_link(line)
50 | 
51 | 
52 | def main() -> int:
53 |     """Echo the input arguments to standard output"""
54 |     lines = []
55 |     with open(filename, "r") as f:
56 |         for line in f:
57 |             lines.append(generate_star_badge(line))
58 |     shutil.copyfile(filename, filename_backup)
59 |     with open(filename, "w") as f:
60 |         for line in lines:
61 |             f.write(line)
62 |     return 0
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     sys.exit(main())  # next section explains the use of sys.exit
67 | 


--------------------------------------------------------------------------------
/scripts/github-markdown-toc:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | #
  4 | # Steps:
  5 | #
  6 | #  1. Download corresponding html file for some README.md:
  7 | #       curl -s $1
  8 | #
  9 | #  2. Discard rows where no substring 'user-content-' (github's markup):
 10 | #       awk '/user-content-/ { ...
 11 | #
 12 | #  3.1 Get last number in each row like ' ... </span></a>sitemap.js</h1'.
 13 | #      It's a level of the current header:
 14 | #       substr($0, length($0), 1)
 15 | #
 16 | #  3.2 Get level from 3.1 and insert corresponding number of spaces before '*':
 17 | #       sprintf("%*s", (level-1)*'"$nb_spaces"', "")
 18 | #
 19 | #  4. Find head's text and insert it inside "* [ ... ]":
 20 | #       substr($0, match($0, /a>.*<\/h/)+2, RLENGTH-5)
 21 | #
 22 | #  5. Find anchor and insert it inside "(...)":
 23 | #       substr($0, match($0, "href=\"[^\"]+?\" ")+6, RLENGTH-8)
 24 | #
 25 | 
 26 | gh_toc_version="0.8.0"
 27 | 
 28 | gh_user_agent="gh-md-toc v$gh_toc_version"
 29 | 
 30 | #
 31 | # Download rendered into html README.md by its url.
 32 | #
 33 | #
 34 | gh_toc_load() {
 35 |     local gh_url=$1
 36 | 
 37 |     if type curl &>/dev/null; then
 38 |         curl --user-agent "$gh_user_agent" -s "$gh_url"
 39 |     elif type wget &>/dev/null; then
 40 |         wget --user-agent="$gh_user_agent" -qO- "$gh_url"
 41 |     else
 42 |         echo "Please, install 'curl' or 'wget' and try again."
 43 |         exit 1
 44 |     fi
 45 | }
 46 | 
 47 | #
 48 | # Converts local md file into html by GitHub
 49 | #
 50 | # -> curl -X POST --data '{"text": "Hello world github/linguist#1 **cool**, and #1!"}' https://api.github.com/markdown
 51 | # <p>Hello world github/linguist#1 <strong>cool</strong>, and #1!</p>'"
 52 | gh_toc_md2html() {
 53 |     local gh_file_md=$1
 54 |     local skip_header=$2
 55 | 
 56 |     URL=https://api.github.com/markdown/raw
 57 | 
 58 |     if [ ! -z "$GH_TOC_TOKEN" ]; then
 59 |         TOKEN=$GH_TOC_TOKEN
 60 |     else
 61 |         TOKEN_FILE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/token.txt"
 62 |         if [ -f "$TOKEN_FILE" ]; then
 63 |             TOKEN="$(cat $TOKEN_FILE)"
 64 |         fi
 65 |     fi
 66 |     if [ ! -z "${TOKEN}" ]; then
 67 |         AUTHORIZATION="Authorization: token ${TOKEN}"
 68 |     fi
 69 | 
 70 |     local gh_tmp_file_md=$gh_file_md
 71 |     if [ "$skip_header" = "yes" ]; then
 72 |         if grep -Fxq "<!--te-->" $gh_src; then
 73 |           # cut everything before the toc
 74 |           gh_tmp_file_md=$gh_file_md~~
 75 |           sed '1,/<!--te-->/d' $gh_file_md > $gh_tmp_file_md
 76 |         fi
 77 |     fi
 78 | 
 79 |     # echo $URL 1>&2
 80 |     OUTPUT=$(curl -s \
 81 |         --user-agent "$gh_user_agent" \
 82 |         --data-binary @"$gh_tmp_file_md" \
 83 |         -H "Content-Type:text/plain" \
 84 |         -H "$AUTHORIZATION" \
 85 |         "$URL")
 86 | 
 87 |     rm -f $gh_file_md~~
 88 | 
 89 |     if [ "$?" != "0" ]; then
 90 |         echo "XXNetworkErrorXX"
 91 |     fi
 92 |     if [ "$(echo "${OUTPUT}" | awk '/API rate limit exceeded/')" != "" ]; then
 93 |         echo "XXRateLimitXX"
 94 |     else
 95 |         echo "${OUTPUT}"
 96 |     fi
 97 | }
 98 | 
 99 | 
100 | #
101 | # Is passed string url
102 | #
103 | gh_is_url() {
104 |     case $1 in
105 |         https* | http*)
106 |             echo "yes";;
107 |         *)
108 |             echo "no";;
109 |     esac
110 | }
111 | 
112 | #
113 | # TOC generator
114 | #
115 | gh_toc(){
116 |     local gh_src=$1
117 |     local gh_src_copy=$1
118 |     local gh_ttl_docs=$2
119 |     local need_replace=$3
120 |     local no_backup=$4
121 |     local no_footer=$5
122 |     local indent=$6
123 |     local skip_header=$7
124 | 
125 |     if [ "$gh_src" = "" ]; then
126 |         echo "Please, enter URL or local path for a README.md"
127 |         exit 1
128 |     fi
129 | 
130 | 
131 |     # Show "TOC" string only if working with one document
132 |     if [ "$gh_ttl_docs" = "1" ]; then
133 | 
134 |         echo "Table of Contents"
135 |         echo "================="
136 |         echo ""
137 |         gh_src_copy=""
138 | 
139 |     fi
140 | 
141 |     if [ "$(gh_is_url "$gh_src")" == "yes" ]; then
142 |         gh_toc_load "$gh_src" | gh_toc_grab "$gh_src_copy" "$indent"
143 |         if [ "${PIPESTATUS[0]}" != "0" ]; then
144 |             echo "Could not load remote document."
145 |             echo "Please check your url or network connectivity"
146 |             exit 1
147 |         fi
148 |         if [ "$need_replace" = "yes" ]; then
149 |             echo
150 |             echo "!! '$gh_src' is not a local file"
151 |             echo "!! Can't insert the TOC into it."
152 |             echo
153 |         fi
154 |     else
155 |         local rawhtml=$(gh_toc_md2html "$gh_src" "$skip_header")
156 |         if [ "$rawhtml" == "XXNetworkErrorXX" ]; then
157 |              echo "Parsing local markdown file requires access to github API"
158 |              echo "Please make sure curl is installed and check your network connectivity"
159 |              exit 1
160 |         fi
161 |         if [ "$rawhtml" == "XXRateLimitXX" ]; then
162 |              echo "Parsing local markdown file requires access to github API"
163 |              echo "Error: You exceeded the hourly limit. See: https://developer.github.com/v3/#rate-limiting"
164 |              TOKEN_FILE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/token.txt"
165 |              echo "or place GitHub auth token here: ${TOKEN_FILE}"
166 |              exit 1
167 |         fi
168 |         local toc=`echo "$rawhtml" | gh_toc_grab "$gh_src_copy" "$indent"`
169 |         echo "$toc"
170 |         if [ "$need_replace" = "yes" ]; then
171 |             if grep -Fxq "<!--ts-->" $gh_src && grep -Fxq "<!--te-->" $gh_src; then
172 |                 echo "Found markers"
173 |             else
174 |                 echo "You don't have <!--ts--> or <!--te--> in your file...exiting"
175 |                 exit 1
176 |             fi
177 |             local ts="<\!--ts-->"
178 |             local te="<\!--te-->"
179 |             local dt=`date +'%F_%H%M%S'`
180 |             local ext=".orig.${dt}"
181 |             local toc_path="${gh_src}.toc.${dt}"
182 |             local toc_createdby="<!-- Created by https://github.com/ekalinin/github-markdown-toc -->"
183 |             local toc_footer="<!-- Added by: `whoami`, at: `date` -->"
184 |             # http://fahdshariff.blogspot.ru/2012/12/sed-mutli-line-replacement-between-two.html
185 |             # clear old TOC
186 |             sed -i${ext} "/${ts}/,/${te}/{//!d;}" "$gh_src"
187 |             # create toc file
188 |             echo "${toc}" > "${toc_path}"
189 |             if [ "${no_footer}" != "yes" ]; then
190 |                 echo -e "\n${toc_createdby}\n${toc_footer}\n" >> "$toc_path"
191 |             fi
192 | 
193 |             # insert toc file
194 |             if ! sed --version > /dev/null 2>&1; then
195 |                 sed -i "" "/${ts}/r ${toc_path}" "$gh_src"
196 |             else
197 |                 sed -i "/${ts}/r ${toc_path}" "$gh_src"
198 |             fi
199 |             echo
200 |             if [ "${no_backup}" = "yes" ]; then
201 |                 rm ${toc_path} ${gh_src}${ext}
202 |             fi
203 |             echo "!! TOC was added into: '$gh_src'"
204 |             if [ -z "${no_backup}" ]; then
205 |                 echo "!! Origin version of the file: '${gh_src}${ext}'"
206 |                 echo "!! TOC added into a separate file: '${toc_path}'"
207 |         fi
208 |             echo
209 |         fi
210 |     fi
211 | }
212 | 
213 | #
214 | # Grabber of the TOC from rendered html
215 | #
216 | # $1 - a source url of document.
217 | #      It's need if TOC is generated for multiple documents.
218 | # $2 - number of spaces used to indent.
219 | #
220 | gh_toc_grab() {
221 |     common_awk_script='
222 |                      modified_href = ""
223 |                      split(href, chars, "")
224 |                      for (i=1;i <= length(href); i++) {
225 |                          c = chars[i]
226 |                          res = ""
227 |                          if (c == "+") {
228 |                              res = " "
229 |                          } else {
230 |                              if (c == "%") {
231 |                                  res = "\\x"
232 |                              } else {
233 |                                  res = c ""
234 |                              }
235 |                          }
236 |                          modified_href = modified_href res
237 |                     }
238 |                     print sprintf("%*s", (level-1)*'"$2"', "") "* [" text "](" gh_url  modified_href ")"
239 |                     '
240 |     if [ `uname -s` == "OS/390" ]; then
241 |         grepcmd="pcregrep -o"
242 |         echoargs=""
243 |         awkscript='{
244 |                      level = substr($0, length($0), 1)
245 |                      text = substr($0, match($0, /a>.*<\/h/)+2, RLENGTH-5)
246 |                      href = substr($0, match($0, "href=\"([^\"]+)?\"")+6, RLENGTH-7)
247 |                      '"$common_awk_script"'
248 |                 }'
249 |     else
250 |         grepcmd="grep -Eo"
251 |         echoargs="-e"
252 |         awkscript='{
253 |                      level = substr($0, length($0), 1)
254 |                      text = substr($0, match($0, /a>.*<\/h/)+2, RLENGTH-5)
255 |                      href = substr($0, match($0, "href=\"[^\"]+?\"")+6, RLENGTH-7)
256 |                      '"$common_awk_script"'
257 |                 }'
258 |     fi
259 |     href_regex='href=\"[^\"]+?\"'
260 | 
261 |     # if closed <h[1-6]> is on the new line, then move it on the prev line
262 |     # for example:
263 |     #   was: The command <code>foo1</code>
264 |     #        </h1>
265 |     #   became: The command <code>foo1</code></h1>
266 |     sed -e ':a' -e 'N' -e '$!ba' -e 's/\n<\/h/<\/h/g' |
267 | 
268 |     # find strings that corresponds to template
269 |     $grepcmd '<a.*id="user-content-[^"]*".*</h[1-6]' |
270 | 
271 |     # remove code tags
272 |     sed 's/<code>//g' | sed 's/<\/code>//g' |
273 | 
274 |     # remove g-emoji
275 |     sed 's/<g-emoji[^>]*[^<]*<\/g-emoji> //g' |
276 | 
277 |     # now all rows are like:
278 |     #   <a id="user-content-..." href="..."><span ...></span></a> ... </h1
279 |     # format result line
280 |     #   * $0 - whole string
281 |     #   * last element of each row: "</hN" where N in (1,2,3,...)
282 |     echo $echoargs "$(awk -v "gh_url=$1" "$awkscript")"
283 | }
284 | 
285 |         # perl -lpE 's/(\[[^\]]*\]\()(.*?)(\))/my ($pre, $in, $post)=($1, $2, $3) ; $in =~ s{\+}{ }g; $in =~ s{%}{\\x}g; $pre.$in.$post/ems')"
286 | 
287 | #
288 | # Returns filename only from full path or url
289 | #
290 | gh_toc_get_filename() {
291 |     echo "${1##*/}"
292 | }
293 | 
294 | show_version() {
295 |     echo "$gh_toc_version"
296 |     echo
297 |     echo "os:     `uname -s`"
298 |     echo "arch:   `uname -m`"
299 |     echo "kernel: `uname -r`"
300 |     echo "shell:  `$SHELL --version`"
301 |     echo
302 |     for tool in curl wget grep awk sed; do
303 |         printf "%-5s: " $tool
304 |         if `type $tool &>/dev/null`; then
305 |             echo `$tool --version | head -n 1`
306 |         else
307 |             echo "not installed"
308 |         fi
309 |     done
310 | }
311 | 
312 | show_help() {
313 |     local app_name=$(basename "$0")
314 |     echo "GitHub TOC generator ($app_name): $gh_toc_version"
315 |     echo ""
316 |     echo "Usage:"
317 |     echo "  $app_name [options] src [src]   Create TOC for a README file (url or local path)"
318 |     echo "  $app_name -                     Create TOC for markdown from STDIN"
319 |     echo "  $app_name --help                Show help"
320 |     echo "  $app_name --version             Show version"
321 |     echo ""
322 |     echo "Options:"
323 |     echo "  --indent <NUM>      Set indent size. Default: 3."
324 |     echo "  --insert            Insert new TOC into original file. For local files only. Default: false."
325 |     echo "                      See https://github.com/ekalinin/github-markdown-toc/issues/41 for details."
326 |     echo "  --no-backup         Remove backup file. Set --insert as well. Default: false."
327 |     echo "  --hide-footer       Do not write date & author of the last TOC update. Set --insert as well. Default: false."
328 |     echo "  --skip-header       Hide entry of the topmost headlines. Default: false."
329 |     echo "                      See https://github.com/ekalinin/github-markdown-toc/issues/125 for details."
330 |     echo ""
331 | }
332 | 
333 | #
334 | # Options handlers
335 | #
336 | gh_toc_app() {
337 |     local need_replace="no"
338 |     local indent=3
339 | 
340 |     if [ "$1" = '--help' ] || [ $# -eq 0 ] ; then
341 |         show_help
342 |         return
343 |     fi
344 | 
345 |     if [ "$1" = '--version' ]; then
346 |         show_version
347 |         return
348 |     fi
349 | 
350 |     if [ "$1" = '--indent' ]; then
351 |         indent="$2"
352 |         shift 2
353 |     fi
354 | 
355 |     if [ "$1" = "-" ]; then
356 |         if [ -z "$TMPDIR" ]; then
357 |             TMPDIR="/tmp"
358 |         elif [ -n "$TMPDIR" -a ! -d "$TMPDIR" ]; then
359 |             mkdir -p "$TMPDIR"
360 |         fi
361 |         local gh_tmp_md
362 |         if [ `uname -s` == "OS/390" ]; then
363 |             local timestamp=$(date +%m%d%Y%H%M%S)
364 |             gh_tmp_md="$TMPDIR/tmp.$timestamp"
365 |         else
366 |             gh_tmp_md=$(mktemp $TMPDIR/tmp.XXXXXX)
367 |         fi
368 |         while read input; do
369 |             echo "$input" >> "$gh_tmp_md"
370 |         done
371 |         gh_toc_md2html "$gh_tmp_md" | gh_toc_grab "" "$indent"
372 |         return
373 |     fi
374 | 
375 |     if [ "$1" = '--insert' ]; then
376 |         need_replace="yes"
377 |         shift
378 |     fi
379 | 
380 |     if [ "$1" = '--no-backup' ]; then
381 |         need_replace="yes"
382 |         no_backup="yes"
383 |         shift
384 |     fi
385 | 
386 |     if [ "$1" = '--hide-footer' ]; then
387 |         need_replace="yes"
388 |         no_footer="yes"
389 |         shift
390 |     fi
391 | 
392 |     if [ "$1" = '--skip-header' ]; then
393 |         skip_header="yes"
394 |         shift
395 |     fi
396 | 
397 | 
398 |     for md in "$@"
399 |     do
400 |         echo ""
401 |         gh_toc "$md" "$#" "$need_replace" "$no_backup" "$no_footer" "$indent" "$skip_header"
402 |     done
403 | 
404 |     echo ""
405 |     echo "<!-- Created by https://github.com/ekalinin/github-markdown-toc -->"
406 | }
407 | 
408 | #
409 | # Entry point
410 | #
411 | gh_toc_app "$@"
412 | 


--------------------------------------------------------------------------------