├── .gitignore ├── README.md ├── anygpt └── src │ ├── __init__.py │ ├── infer │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ ├── pre_post_process.cpython-39.pyc │ │ └── voice_clone.cpython-39.pyc │ ├── cli_infer_base_model.py │ ├── cli_infer_chat_model.py │ ├── pre_post_process.py │ └── voice_clone.py │ ├── m_utils │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-39.pyc │ │ ├── anything2token.cpython-39.pyc │ │ ├── conversation.cpython-39.pyc │ │ ├── instructions.cpython-39.pyc │ │ ├── other2text_instructions.cpython-39.pyc │ │ ├── prompter.cpython-39.pyc │ │ ├── read_modality.cpython-39.pyc │ │ └── text2other_instructions.cpython-39.pyc │ ├── anything2token.py │ ├── conversation.py │ ├── instructions.py │ ├── other2text_instructions.py │ ├── prompter.py │ ├── read_modality.py │ ├── speech_utils.py │ ├── text2other_instructions.py │ └── transforms.py │ └── train │ ├── __init__.py │ ├── stage1_pretrain.py │ └── stage2_sft.py ├── config ├── generate_config.json ├── image_generate_config.json ├── music_generate_config.json ├── speech_generate_config.json └── text_generate_config.json ├── data ├── instruction │ └── anyinstruct_speech.jsonl └── pretrain │ ├── image │ ├── journeydb.jsonl │ ├── laion-coco-caption.jsonl │ ├── laion2b.jsonl │ └── laion_aesthetics_6plus_8m.jsonl │ ├── music │ └── music-1m.jsonl │ └── speech │ ├── commonvoice.jsonl │ ├── gigaspeech.jsonl │ └── mls.jsonl ├── requirements.txt ├── scripts ├── cli_infer_base_model.sh ├── cli_infer_chat_model.sh ├── stage1_pretrain.sh └── stage2_sft.sh ├── seed2 ├── __init__.py ├── llama_xformer.py ├── model_tools.py ├── pipeline_stable_unclip_img2img.py ├── seed_llama_tokenizer.py ├── seed_qformer │ ├── blip2.py │ ├── clip_vit.py │ ├── eva_vit.py │ ├── qformer_causual.py │ ├── qformer_quantizer.py │ ├── utils.py │ └── vit.py └── transforms.py ├── soundstorm_speechtokenizer ├── __init__.py ├── attend.py ├── dataset.py ├── optimizer.py ├── soundstorm.py ├── tracking.py └── trainer.py └── static ├── images ├── logo.png └── model1.jpg └── infer ├── image └── cat.jpg ├── music └── features an indie rock sound with distinct element.wav └── speech ├── instruction ├── Can you draw me a picture of a sunny beach.wav └── Give me a similar style of music.wav ├── voice_prompt1.wav ├── voice_prompt2.wav └── voice_prompt3.wav /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/.gitignore -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/README.md -------------------------------------------------------------------------------- /anygpt/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /anygpt/src/infer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /anygpt/src/infer/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/infer/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /anygpt/src/infer/__pycache__/pre_post_process.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/infer/__pycache__/pre_post_process.cpython-39.pyc -------------------------------------------------------------------------------- /anygpt/src/infer/__pycache__/voice_clone.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/infer/__pycache__/voice_clone.cpython-39.pyc -------------------------------------------------------------------------------- /anygpt/src/infer/cli_infer_base_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/infer/cli_infer_base_model.py -------------------------------------------------------------------------------- /anygpt/src/infer/cli_infer_chat_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/infer/cli_infer_chat_model.py -------------------------------------------------------------------------------- /anygpt/src/infer/pre_post_process.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/infer/pre_post_process.py -------------------------------------------------------------------------------- /anygpt/src/infer/voice_clone.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/infer/voice_clone.py -------------------------------------------------------------------------------- /anygpt/src/m_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /anygpt/src/m_utils/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/m_utils/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /anygpt/src/m_utils/__pycache__/anything2token.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/m_utils/__pycache__/anything2token.cpython-39.pyc -------------------------------------------------------------------------------- /anygpt/src/m_utils/__pycache__/conversation.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/m_utils/__pycache__/conversation.cpython-39.pyc -------------------------------------------------------------------------------- /anygpt/src/m_utils/__pycache__/instructions.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/m_utils/__pycache__/instructions.cpython-39.pyc -------------------------------------------------------------------------------- /anygpt/src/m_utils/__pycache__/other2text_instructions.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/m_utils/__pycache__/other2text_instructions.cpython-39.pyc -------------------------------------------------------------------------------- /anygpt/src/m_utils/__pycache__/prompter.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/m_utils/__pycache__/prompter.cpython-39.pyc -------------------------------------------------------------------------------- /anygpt/src/m_utils/__pycache__/read_modality.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/m_utils/__pycache__/read_modality.cpython-39.pyc -------------------------------------------------------------------------------- /anygpt/src/m_utils/__pycache__/text2other_instructions.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/m_utils/__pycache__/text2other_instructions.cpython-39.pyc -------------------------------------------------------------------------------- /anygpt/src/m_utils/anything2token.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/m_utils/anything2token.py -------------------------------------------------------------------------------- /anygpt/src/m_utils/conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/m_utils/conversation.py -------------------------------------------------------------------------------- /anygpt/src/m_utils/instructions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/m_utils/instructions.py -------------------------------------------------------------------------------- /anygpt/src/m_utils/other2text_instructions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/m_utils/other2text_instructions.py -------------------------------------------------------------------------------- /anygpt/src/m_utils/prompter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/m_utils/prompter.py -------------------------------------------------------------------------------- /anygpt/src/m_utils/read_modality.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/m_utils/read_modality.py -------------------------------------------------------------------------------- /anygpt/src/m_utils/speech_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/m_utils/speech_utils.py -------------------------------------------------------------------------------- /anygpt/src/m_utils/text2other_instructions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/m_utils/text2other_instructions.py -------------------------------------------------------------------------------- /anygpt/src/m_utils/transforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/m_utils/transforms.py -------------------------------------------------------------------------------- /anygpt/src/train/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /anygpt/src/train/stage1_pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/train/stage1_pretrain.py -------------------------------------------------------------------------------- /anygpt/src/train/stage2_sft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/anygpt/src/train/stage2_sft.py -------------------------------------------------------------------------------- /config/generate_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/config/generate_config.json -------------------------------------------------------------------------------- /config/image_generate_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/config/image_generate_config.json -------------------------------------------------------------------------------- /config/music_generate_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/config/music_generate_config.json -------------------------------------------------------------------------------- /config/speech_generate_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/config/speech_generate_config.json -------------------------------------------------------------------------------- /config/text_generate_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/config/text_generate_config.json -------------------------------------------------------------------------------- /data/instruction/anyinstruct_speech.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/data/instruction/anyinstruct_speech.jsonl -------------------------------------------------------------------------------- /data/pretrain/image/journeydb.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/data/pretrain/image/journeydb.jsonl -------------------------------------------------------------------------------- /data/pretrain/image/laion-coco-caption.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/data/pretrain/image/laion-coco-caption.jsonl -------------------------------------------------------------------------------- /data/pretrain/image/laion2b.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/data/pretrain/image/laion2b.jsonl -------------------------------------------------------------------------------- /data/pretrain/image/laion_aesthetics_6plus_8m.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/data/pretrain/image/laion_aesthetics_6plus_8m.jsonl -------------------------------------------------------------------------------- /data/pretrain/music/music-1m.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/data/pretrain/music/music-1m.jsonl -------------------------------------------------------------------------------- /data/pretrain/speech/commonvoice.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/data/pretrain/speech/commonvoice.jsonl -------------------------------------------------------------------------------- /data/pretrain/speech/gigaspeech.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/data/pretrain/speech/gigaspeech.jsonl -------------------------------------------------------------------------------- /data/pretrain/speech/mls.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/data/pretrain/speech/mls.jsonl -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/cli_infer_base_model.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/scripts/cli_infer_base_model.sh -------------------------------------------------------------------------------- /scripts/cli_infer_chat_model.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/scripts/cli_infer_chat_model.sh -------------------------------------------------------------------------------- /scripts/stage1_pretrain.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/scripts/stage1_pretrain.sh -------------------------------------------------------------------------------- /scripts/stage2_sft.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/scripts/stage2_sft.sh -------------------------------------------------------------------------------- /seed2/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /seed2/llama_xformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/seed2/llama_xformer.py -------------------------------------------------------------------------------- /seed2/model_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/seed2/model_tools.py -------------------------------------------------------------------------------- /seed2/pipeline_stable_unclip_img2img.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/seed2/pipeline_stable_unclip_img2img.py -------------------------------------------------------------------------------- /seed2/seed_llama_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/seed2/seed_llama_tokenizer.py -------------------------------------------------------------------------------- /seed2/seed_qformer/blip2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/seed2/seed_qformer/blip2.py -------------------------------------------------------------------------------- /seed2/seed_qformer/clip_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/seed2/seed_qformer/clip_vit.py -------------------------------------------------------------------------------- /seed2/seed_qformer/eva_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/seed2/seed_qformer/eva_vit.py -------------------------------------------------------------------------------- /seed2/seed_qformer/qformer_causual.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/seed2/seed_qformer/qformer_causual.py -------------------------------------------------------------------------------- /seed2/seed_qformer/qformer_quantizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/seed2/seed_qformer/qformer_quantizer.py -------------------------------------------------------------------------------- /seed2/seed_qformer/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/seed2/seed_qformer/utils.py -------------------------------------------------------------------------------- /seed2/seed_qformer/vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/seed2/seed_qformer/vit.py -------------------------------------------------------------------------------- /seed2/transforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/seed2/transforms.py -------------------------------------------------------------------------------- /soundstorm_speechtokenizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/soundstorm_speechtokenizer/__init__.py -------------------------------------------------------------------------------- /soundstorm_speechtokenizer/attend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/soundstorm_speechtokenizer/attend.py -------------------------------------------------------------------------------- /soundstorm_speechtokenizer/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/soundstorm_speechtokenizer/dataset.py -------------------------------------------------------------------------------- /soundstorm_speechtokenizer/optimizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/soundstorm_speechtokenizer/optimizer.py -------------------------------------------------------------------------------- /soundstorm_speechtokenizer/soundstorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/soundstorm_speechtokenizer/soundstorm.py -------------------------------------------------------------------------------- /soundstorm_speechtokenizer/tracking.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/soundstorm_speechtokenizer/tracking.py -------------------------------------------------------------------------------- /soundstorm_speechtokenizer/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/soundstorm_speechtokenizer/trainer.py -------------------------------------------------------------------------------- /static/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/static/images/logo.png -------------------------------------------------------------------------------- /static/images/model1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/static/images/model1.jpg -------------------------------------------------------------------------------- /static/infer/image/cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/static/infer/image/cat.jpg -------------------------------------------------------------------------------- /static/infer/music/features an indie rock sound with distinct element.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/static/infer/music/features an indie rock sound with distinct element.wav -------------------------------------------------------------------------------- /static/infer/speech/instruction/Can you draw me a picture of a sunny beach.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/static/infer/speech/instruction/Can you draw me a picture of a sunny beach.wav -------------------------------------------------------------------------------- /static/infer/speech/instruction/Give me a similar style of music.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/static/infer/speech/instruction/Give me a similar style of music.wav -------------------------------------------------------------------------------- /static/infer/speech/voice_prompt1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/static/infer/speech/voice_prompt1.wav -------------------------------------------------------------------------------- /static/infer/speech/voice_prompt2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/static/infer/speech/voice_prompt2.wav -------------------------------------------------------------------------------- /static/infer/speech/voice_prompt3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenMOSS/AnyGPT/HEAD/static/infer/speech/voice_prompt3.wav --------------------------------------------------------------------------------