├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ ├── feature_request.yml │ ├── mtebzh_dataset.yml │ └── mtebzh_model.yml └── workflows │ ├── cd.yml │ └── ci.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── docs └── imgs │ └── medi-help.png ├── examples ├── example_data │ └── riddle.jsonl ├── finetune.ipynb ├── finetune_hf_dataset.py └── finetune_jsonl.py ├── mteb-zh ├── generate_report.py ├── mteb_zh │ ├── __init__.py │ ├── models.py │ └── tasks.py ├── readme.md ├── requirements.txt └── run_mteb_zh.py ├── poetry.lock ├── pyproject.toml ├── scripts ├── evalutate_mteb.py ├── process_zh_datasets.py ├── train_m3e.py └── train_medi.py ├── tests ├── __init__.py ├── conftest.py ├── fixtures │ ├── mini_medi.json │ └── model │ │ ├── config.json │ │ ├── pytorch_model.bin │ │ ├── special_tokens_map.json │ │ ├── tokenizer.json │ │ ├── tokenizer_config.json │ │ └── vocab.txt ├── test_data.py ├── test_data_structures.py ├── test_finetuner.py ├── test_loss.py ├── test_model.py └── test_utils.py └── uniem ├── __init__.py ├── criteria.py ├── data.py ├── data_structures.py ├── finetuner.py ├── integration ├── __init__.py └── sentence_transformers_wrapper.py ├── model.py ├── trainer.py ├── training_strategy.py ├── types.py ├── utils.py └── version.py /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/.github/ISSUE_TEMPLATE/bug_report.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/.github/ISSUE_TEMPLATE/feature_request.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/mtebzh_dataset.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/.github/ISSUE_TEMPLATE/mtebzh_dataset.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/mtebzh_model.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/.github/ISSUE_TEMPLATE/mtebzh_model.yml -------------------------------------------------------------------------------- /.github/workflows/cd.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/.github/workflows/cd.yml -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/.github/workflows/ci.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/LICENSE -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/README.md -------------------------------------------------------------------------------- /docs/imgs/medi-help.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/docs/imgs/medi-help.png -------------------------------------------------------------------------------- /examples/example_data/riddle.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/examples/example_data/riddle.jsonl -------------------------------------------------------------------------------- /examples/finetune.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/examples/finetune.ipynb -------------------------------------------------------------------------------- /examples/finetune_hf_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/examples/finetune_hf_dataset.py -------------------------------------------------------------------------------- /examples/finetune_jsonl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/examples/finetune_jsonl.py -------------------------------------------------------------------------------- /mteb-zh/generate_report.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/mteb-zh/generate_report.py -------------------------------------------------------------------------------- /mteb-zh/mteb_zh/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mteb-zh/mteb_zh/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/mteb-zh/mteb_zh/models.py -------------------------------------------------------------------------------- /mteb-zh/mteb_zh/tasks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/mteb-zh/mteb_zh/tasks.py -------------------------------------------------------------------------------- /mteb-zh/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/mteb-zh/readme.md -------------------------------------------------------------------------------- /mteb-zh/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/mteb-zh/requirements.txt -------------------------------------------------------------------------------- /mteb-zh/run_mteb_zh.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/mteb-zh/run_mteb_zh.py -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/poetry.lock -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/pyproject.toml -------------------------------------------------------------------------------- /scripts/evalutate_mteb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/scripts/evalutate_mteb.py -------------------------------------------------------------------------------- /scripts/process_zh_datasets.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/scripts/process_zh_datasets.py -------------------------------------------------------------------------------- /scripts/train_m3e.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/scripts/train_m3e.py -------------------------------------------------------------------------------- /scripts/train_medi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/scripts/train_medi.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/fixtures/mini_medi.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/tests/fixtures/mini_medi.json -------------------------------------------------------------------------------- /tests/fixtures/model/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/tests/fixtures/model/config.json -------------------------------------------------------------------------------- /tests/fixtures/model/pytorch_model.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/tests/fixtures/model/pytorch_model.bin -------------------------------------------------------------------------------- /tests/fixtures/model/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/tests/fixtures/model/special_tokens_map.json -------------------------------------------------------------------------------- /tests/fixtures/model/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/tests/fixtures/model/tokenizer.json -------------------------------------------------------------------------------- /tests/fixtures/model/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/tests/fixtures/model/tokenizer_config.json -------------------------------------------------------------------------------- /tests/fixtures/model/vocab.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/tests/fixtures/model/vocab.txt -------------------------------------------------------------------------------- /tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/tests/test_data.py -------------------------------------------------------------------------------- /tests/test_data_structures.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/tests/test_data_structures.py -------------------------------------------------------------------------------- /tests/test_finetuner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/tests/test_finetuner.py -------------------------------------------------------------------------------- /tests/test_loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/tests/test_loss.py -------------------------------------------------------------------------------- /tests/test_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/tests/test_model.py -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/tests/test_utils.py -------------------------------------------------------------------------------- /uniem/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/uniem/__init__.py -------------------------------------------------------------------------------- /uniem/criteria.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/uniem/criteria.py -------------------------------------------------------------------------------- /uniem/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/uniem/data.py -------------------------------------------------------------------------------- /uniem/data_structures.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/uniem/data_structures.py -------------------------------------------------------------------------------- /uniem/finetuner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/uniem/finetuner.py -------------------------------------------------------------------------------- /uniem/integration/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /uniem/integration/sentence_transformers_wrapper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/uniem/integration/sentence_transformers_wrapper.py -------------------------------------------------------------------------------- /uniem/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/uniem/model.py -------------------------------------------------------------------------------- /uniem/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/uniem/trainer.py -------------------------------------------------------------------------------- /uniem/training_strategy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/uniem/training_strategy.py -------------------------------------------------------------------------------- /uniem/types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/uniem/types.py -------------------------------------------------------------------------------- /uniem/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyuxinwhy/uniem/HEAD/uniem/utils.py -------------------------------------------------------------------------------- /uniem/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.3.3' 2 | --------------------------------------------------------------------------------