├── .DS_Store ├── .gitignore ├── .pre-commit-config.yaml ├── .python-version ├── .readthedocs.yaml ├── API_REFERENCE.md ├── LICENSE ├── README.md ├── docs ├── .DS_Store ├── GalacticDataset.rst ├── Makefile ├── classifiers.rst ├── clustering.rst ├── conf.py ├── conversations.rst ├── embedding.rst ├── filters.rst ├── index.rst ├── installation.rst ├── loaders.rst ├── make.bat ├── minhash.rst ├── quickstart.rst ├── requirements.txt ├── taggers.rst ├── transforms.rst └── visualize.rst ├── examples ├── clustering.ipynb ├── example.ipynb ├── hermes.ipynb └── rlhf-data.ipynb ├── galactic.jpeg ├── graph.png ├── plot.png ├── pyproject.toml ├── requirements.txt ├── setup.py ├── src ├── .DS_Store └── galactic │ ├── __init__.py │ ├── async_openai.py │ ├── augment.py │ ├── base.py │ ├── classifiers.py │ ├── clustering.py │ ├── conversations.py │ ├── embedding.py │ ├── embedding_backends │ ├── __init__.py │ ├── base.py │ ├── ctranslate2_backend.py │ ├── modal_backend.py │ ├── modal_remote.py │ ├── onnx_backend.py │ ├── openai_backend.py │ └── replicate_backend.py │ ├── extract_doc.py │ ├── filters.py │ ├── galactic.py │ ├── kenlm.py │ ├── loaders.py │ ├── logger.py │ ├── minhash_lsh.py │ ├── scraping.py │ ├── taggers.py │ ├── transforms.py │ ├── utils.py │ └── visualize.py └── tests ├── README.md ├── test_count_tokens ├── test_filter_string ├── test_init ├── test_load_file_formats └── test_save /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | nlp 2 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/.readthedocs.yaml -------------------------------------------------------------------------------- /API_REFERENCE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/API_REFERENCE.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/README.md -------------------------------------------------------------------------------- /docs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/docs/.DS_Store -------------------------------------------------------------------------------- /docs/GalacticDataset.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/docs/GalacticDataset.rst -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/classifiers.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/docs/classifiers.rst -------------------------------------------------------------------------------- /docs/clustering.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/docs/clustering.rst -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/docs/conf.py -------------------------------------------------------------------------------- /docs/conversations.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/docs/conversations.rst -------------------------------------------------------------------------------- /docs/embedding.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/docs/embedding.rst -------------------------------------------------------------------------------- /docs/filters.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/docs/filters.rst -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/docs/index.rst -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/docs/installation.rst -------------------------------------------------------------------------------- /docs/loaders.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/docs/loaders.rst -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/docs/make.bat -------------------------------------------------------------------------------- /docs/minhash.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/docs/minhash.rst -------------------------------------------------------------------------------- /docs/quickstart.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/docs/quickstart.rst -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/docs/requirements.txt -------------------------------------------------------------------------------- /docs/taggers.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/docs/taggers.rst -------------------------------------------------------------------------------- /docs/transforms.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/docs/transforms.rst -------------------------------------------------------------------------------- /docs/visualize.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/docs/visualize.rst -------------------------------------------------------------------------------- /examples/clustering.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/examples/clustering.ipynb -------------------------------------------------------------------------------- /examples/example.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/examples/example.ipynb -------------------------------------------------------------------------------- /examples/hermes.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/examples/hermes.ipynb -------------------------------------------------------------------------------- /examples/rlhf-data.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/examples/rlhf-data.ipynb -------------------------------------------------------------------------------- /galactic.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/galactic.jpeg -------------------------------------------------------------------------------- /graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/graph.png -------------------------------------------------------------------------------- /plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/plot.png -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/setup.py -------------------------------------------------------------------------------- /src/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/.DS_Store -------------------------------------------------------------------------------- /src/galactic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/__init__.py -------------------------------------------------------------------------------- /src/galactic/async_openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/async_openai.py -------------------------------------------------------------------------------- /src/galactic/augment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/augment.py -------------------------------------------------------------------------------- /src/galactic/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/base.py -------------------------------------------------------------------------------- /src/galactic/classifiers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/classifiers.py -------------------------------------------------------------------------------- /src/galactic/clustering.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/clustering.py -------------------------------------------------------------------------------- /src/galactic/conversations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/conversations.py -------------------------------------------------------------------------------- /src/galactic/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/embedding.py -------------------------------------------------------------------------------- /src/galactic/embedding_backends/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/galactic/embedding_backends/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/embedding_backends/base.py -------------------------------------------------------------------------------- /src/galactic/embedding_backends/ctranslate2_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/embedding_backends/ctranslate2_backend.py -------------------------------------------------------------------------------- /src/galactic/embedding_backends/modal_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/embedding_backends/modal_backend.py -------------------------------------------------------------------------------- /src/galactic/embedding_backends/modal_remote.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/embedding_backends/modal_remote.py -------------------------------------------------------------------------------- /src/galactic/embedding_backends/onnx_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/embedding_backends/onnx_backend.py -------------------------------------------------------------------------------- /src/galactic/embedding_backends/openai_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/embedding_backends/openai_backend.py -------------------------------------------------------------------------------- /src/galactic/embedding_backends/replicate_backend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/embedding_backends/replicate_backend.py -------------------------------------------------------------------------------- /src/galactic/extract_doc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/extract_doc.py -------------------------------------------------------------------------------- /src/galactic/filters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/filters.py -------------------------------------------------------------------------------- /src/galactic/galactic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/galactic.py -------------------------------------------------------------------------------- /src/galactic/kenlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/kenlm.py -------------------------------------------------------------------------------- /src/galactic/loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/loaders.py -------------------------------------------------------------------------------- /src/galactic/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/logger.py -------------------------------------------------------------------------------- /src/galactic/minhash_lsh.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/minhash_lsh.py -------------------------------------------------------------------------------- /src/galactic/scraping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/scraping.py -------------------------------------------------------------------------------- /src/galactic/taggers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/taggers.py -------------------------------------------------------------------------------- /src/galactic/transforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/transforms.py -------------------------------------------------------------------------------- /src/galactic/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/utils.py -------------------------------------------------------------------------------- /src/galactic/visualize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/src/galactic/visualize.py -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # haha just kidding. but soon! -------------------------------------------------------------------------------- /tests/test_count_tokens: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/tests/test_count_tokens -------------------------------------------------------------------------------- /tests/test_filter_string: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/tests/test_filter_string -------------------------------------------------------------------------------- /tests/test_init: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/tests/test_init -------------------------------------------------------------------------------- /tests/test_load_file_formats: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/tests/test_load_file_formats -------------------------------------------------------------------------------- /tests/test_save: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/taylorai/galactic/HEAD/tests/test_save --------------------------------------------------------------------------------