├── LICENSE
├── README.md
├── app.py
├── asserts
├── HeartLink.gif
├── HeartLink.png
├── HeartLink_digitalhuman.gif
├── chart.png
└── logo.jpg
├── datasets
└── README.md
├── demo
├── TTS
│ ├── GPT_SoVITS
│ │ ├── AR
│ │ │ ├── __init__.py
│ │ │ ├── data
│ │ │ │ ├── __init__.py
│ │ │ │ ├── __pycache__
│ │ │ │ │ ├── __init__.cpython-310.pyc
│ │ │ │ │ ├── bucket_sampler.cpython-310.pyc
│ │ │ │ │ ├── data_module.cpython-310.pyc
│ │ │ │ │ └── dataset.cpython-310.pyc
│ │ │ │ ├── bucket_sampler.py
│ │ │ │ ├── data_module.py
│ │ │ │ └── dataset.py
│ │ │ ├── models
│ │ │ │ ├── __init__.py
│ │ │ │ ├── __pycache__
│ │ │ │ │ ├── __init__.cpython-310.pyc
│ │ │ │ │ ├── t2s_lightning_module.cpython-310.pyc
│ │ │ │ │ ├── t2s_model.cpython-310.pyc
│ │ │ │ │ └── utils.cpython-310.pyc
│ │ │ │ ├── t2s_lightning_module.py
│ │ │ │ ├── t2s_lightning_module_onnx.py
│ │ │ │ ├── t2s_model.py
│ │ │ │ ├── t2s_model_onnx.py
│ │ │ │ └── utils.py
│ │ │ ├── modules
│ │ │ │ ├── __init__.py
│ │ │ │ ├── __pycache__
│ │ │ │ │ ├── __init__.cpython-310.pyc
│ │ │ │ │ ├── activation.cpython-310.pyc
│ │ │ │ │ ├── embedding.cpython-310.pyc
│ │ │ │ │ ├── lr_schedulers.cpython-310.pyc
│ │ │ │ │ ├── optim.cpython-310.pyc
│ │ │ │ │ ├── patched_mha_with_cache.cpython-310.pyc
│ │ │ │ │ ├── scaling.cpython-310.pyc
│ │ │ │ │ └── transformer.cpython-310.pyc
│ │ │ │ ├── activation.py
│ │ │ │ ├── activation_onnx.py
│ │ │ │ ├── embedding.py
│ │ │ │ ├── embedding_onnx.py
│ │ │ │ ├── lr_schedulers.py
│ │ │ │ ├── optim.py
│ │ │ │ ├── patched_mha_with_cache.py
│ │ │ │ ├── patched_mha_with_cache_onnx.py
│ │ │ │ ├── scaling.py
│ │ │ │ ├── transformer.py
│ │ │ │ └── transformer_onnx.py
│ │ │ ├── text_processing
│ │ │ │ ├── __init__.py
│ │ │ │ ├── phonemizer.py
│ │ │ │ └── symbols.py
│ │ │ └── utils
│ │ │ │ ├── __init__.py
│ │ │ │ ├── __pycache__
│ │ │ │ ├── __init__.cpython-310.pyc
│ │ │ │ └── io.cpython-310.pyc
│ │ │ │ ├── initialize.py
│ │ │ │ └── io.py
│ │ ├── __init__.py
│ │ ├── cankao.wav
│ │ ├── cankao2.wav
│ │ ├── feature_extractor
│ │ │ ├── __init__.py
│ │ │ ├── cnhubert.py
│ │ │ └── whisper_enc.py
│ │ ├── module
│ │ │ ├── __init__.py
│ │ │ ├── attentions.py
│ │ │ ├── attentions_onnx.py
│ │ │ ├── commons.py
│ │ │ ├── core_vq.py
│ │ │ ├── data_utils.py
│ │ │ ├── losses.py
│ │ │ ├── mel_processing.py
│ │ │ ├── models.py
│ │ │ ├── models_onnx.py
│ │ │ ├── modules.py
│ │ │ ├── mrte_model.py
│ │ │ ├── quantize.py
│ │ │ └── transforms.py
│ │ ├── my_utils.py
│ │ ├── output.wav
│ │ ├── text
│ │ │ ├── __init__.py
│ │ │ ├── chinese.py
│ │ │ ├── cleaner.py
│ │ │ ├── cmudict-fast.rep
│ │ │ ├── cmudict.rep
│ │ │ ├── engdict-hot.rep
│ │ │ ├── engdict_cache.pickle
│ │ │ ├── english.py
│ │ │ ├── japanese.py
│ │ │ ├── namedict_cache.pickle
│ │ │ ├── opencpop-strict.txt
│ │ │ ├── symbols.py
│ │ │ ├── tone_sandhi.py
│ │ │ └── zh_normalization
│ │ │ │ ├── README.md
│ │ │ │ ├── __init__.py
│ │ │ │ ├── __pycache__
│ │ │ │ ├── __init__.cpython-310.pyc
│ │ │ │ ├── char_convert.cpython-310.pyc
│ │ │ │ ├── chronology.cpython-310.pyc
│ │ │ │ ├── constants.cpython-310.pyc
│ │ │ │ ├── num.cpython-310.pyc
│ │ │ │ ├── phonecode.cpython-310.pyc
│ │ │ │ ├── quantifier.cpython-310.pyc
│ │ │ │ └── text_normlization.cpython-310.pyc
│ │ │ │ ├── char_convert.py
│ │ │ │ ├── chronology.py
│ │ │ │ ├── constants.py
│ │ │ │ ├── num.py
│ │ │ │ ├── phonecode.py
│ │ │ │ ├── quantifier.py
│ │ │ │ └── text_normlization.py
│ │ ├── tts.py
│ │ └── utils.py
│ ├── TEMP
│ │ ├── jieba.cache
│ │ └── tmp_s2.json
│ ├── __init__.py
│ ├── config.py
│ ├── data_process.py
│ ├── i18n
│ │ └── locale
│ │ │ ├── en_US.json
│ │ │ ├── es_ES.json
│ │ │ ├── fr_FR.json
│ │ │ ├── it_IT.json
│ │ │ ├── ja_JP.json
│ │ │ ├── ko_KR.json
│ │ │ ├── pt_BR.json
│ │ │ ├── ru_RU.json
│ │ │ ├── tr_TR.json
│ │ │ ├── zh_CN.json
│ │ │ ├── zh_HK.json
│ │ │ ├── zh_SG.json
│ │ │ └── zh_TW.json
│ └── webui.py
├── __init__.py
├── app.py
├── asserts
│ └── logo.jpg
└── config.py
├── finetune_config
└── xtuner_config
│ └── README.md
├── nltk_data
├── corpora
│ ├── cmudict.zip
│ └── cmudict
│ │ ├── README
│ │ └── cmudict
└── taggers
│ ├── averaged_perceptron_tagger.zip
│ └── averaged_perceptron_tagger
│ └── averaged_perceptron_tagger.pickle
└── requirements.txt
/README.md:
--------------------------------------------------------------------------------
1 | # HeartLink - 心理共情大模型
2 |
加justify-content: center;居中
164 | loading_placeholder.markdown("""
165 |
166 |
167 |
正在生成文本,请稍等
168 |
169 |
183 | """, unsafe_allow_html=True)
184 | items = ''
185 | print(st.session_state.messages)
186 | while True:
187 | for item in pipe.stream_infer(prompts=prompts, gen_config=tb_generation_config):
188 | items += item.text
189 | print(item.text,end='')
190 | try:
191 | response = json.loads(items)["共情回复"]
192 | emotion = json.loads(items)["情绪"].replace("," ,",")
193 | break
194 | except:
195 | continue
196 |
197 | loading_placeholder.empty()
198 | message_placeholder.markdown(response)
199 |
200 | with st.spinner("正在生成语音,请稍等~"):
201 | sr, audio_io = get_tts_wav(ref_wav_path=ref_wav_path, prompt_text=prompt_text, prompt_language=prompt_language, text=response, text_language=text_language,
202 | tokenizer=tokenizer, bert_model=bert_model, ssl_model=ssl_model, vq_model=vq_model, hps=hps, t2s_model=t2s_model, max_sec=max_sec,
203 | )
204 | try:
205 | st.audio(data=audio_io, format="audio/wav", autoplay=True)
206 | except:
207 | st.audio(data=audio_io, format="audio/wav")
208 |
209 | emotions = emotion.split(',')
210 |
211 | try:
212 | print(st.session_state.messages)
213 | tmp = copy.deepcopy(st.session_state.messages[-2]["emotions"])
214 | for e in emotions:
215 | try:
216 | tmp[e] += 1
217 | except:
218 | tmp[e] = 1
219 |
220 | except:
221 | print(st.session_state.messages)
222 | print('false')
223 | tmp = {e: 1 for e in emotions}
224 | print(tmp)
225 |
226 | st.session_state.messages.append({
227 | 'role': 'assistant',
228 | 'content': items,
229 | 'wav': audio_io,
230 | 'emotions': tmp,
231 | 'avatar': '/home/xlab-app-center/demo/asserts/logo.jpg',
232 | })
233 |
234 |
235 | with st.sidebar:
236 | st.subheader("情绪分析图表")
237 | df = pd.DataFrame(list(tmp.items()), columns=['Emotion', 'Count'])
238 | chart = alt.Chart(df).mark_bar(size=50).encode(
239 | x=alt.X('Count:Q', title='Count'),
240 | y=alt.Y('Emotion:N', title='Emotion', axis=alt.Axis(labelAngle=0)),
241 |
242 | color=alt.Color('Emotion:N', legend=None)
243 | ).properties(
244 | width=400,
245 | height=400
246 | ).interactive()
247 | with st.container(height=400, border=True):
248 | st.altair_chart(chart, use_container_width=True)
249 |
250 | torch.cuda.empty_cache()
251 |
252 | if __name__ == '__main__':
253 | main()
254 |
--------------------------------------------------------------------------------
/demo/asserts/logo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nobody-ML/HeartLink/317c04bd94f834ce78d3e16bc6a62ee6e3f6f383/demo/asserts/logo.jpg
--------------------------------------------------------------------------------
/demo/config.py:
--------------------------------------------------------------------------------
1 | from lmdeploy import pipeline, GenerationConfig, TurbomindEngineConfig, ChatTemplateConfig
2 | import os
3 |
4 | #######################################################################
5 | # PART 1 lmdeploy #
6 | #######################################################################
7 | SYSTEM = os.getenv("SYSTEM")
8 |
9 | IS_TURBOMIND = True
10 | IS_PYTORCH = False
11 |
12 | backend_config = TurbomindEngineConfig(cache_max_entry_count=0.3)
13 | chat_template_config = ChatTemplateConfig(model_name='internlm2',meta_instruction=SYSTEM)
14 |
15 | #######################################################################
16 | # PART 2 TTS #
17 | #######################################################################
18 | prompt_text = "胡桃的胡是胡吃海喝的胡,胡桃的桃却不是淘气的淘!嘿嘿…不、不好笑吗?"
19 | prompt_language = "中文"
20 | text_language = "中文"
21 | ref_wav_path = "/home/xlab-app-center/demo/TTS/GPT_SoVITS/cankao2.wav"
22 |
--------------------------------------------------------------------------------
/finetune_config/xtuner_config/README.md:
--------------------------------------------------------------------------------
1 | # 基于 Xtuner 的微调指南
2 |
3 | ## 1. 环境准备
4 |
5 | - 微调硬件:对于 7B 的模型需要 A100(40G)
6 |
7 |
8 | - 使用 conda 先构建一个 Python-3.10 的虚拟环境
9 |
10 | ```bash
11 | conda create --name finetune_xtuner python=3.10 -y
12 | conda activate finetune_xtuner
13 | ```
14 |
15 | - 通过 pip 安装 XTuner:
16 |
17 | ```shell
18 | pip install -U xtuner
19 | ```
20 |
21 | 亦可集成 DeepSpeed 安装:
22 |
23 | ```shell
24 | pip install -U 'xtuner[deepspeed]'
25 | ```
26 |
27 | - 从源码安装 XTuner:
28 |
29 | ```shell
30 | git clone https://github.com/InternLM/xtuner.git
31 | cd xtuner
32 | pip install -e '.[all]'
33 | ```
34 |
35 |
36 | ## 2. 微调配置
37 | - 进入配置文件夹
38 |
39 | ```shell
40 | cd xtuner_config
41 | ```
42 | - 查看 XTuner 支持模型
43 | ```shell
44 | xtuner list-cfg
45 | ```
46 | - 拷贝目标模型的配置文件与修改配置文件参数
47 | ```shell
48 | xtuner copy-cfg internlm2_chat_7b_qlora_oasst1_e3 .
49 | mv internlm2_chat_7b_qlora_oasst1_e3.py internlm2_chat_7b_qlora.py
50 | vim internlm2_chat_7b_qlora.py
51 | ```
52 |
53 | ## 3. 微调训练
54 | - 运行微调训练
55 | ```shell
56 | # 单卡
57 | xtuner train /root/SoulStar/finetune_config/xtuner_config/internlm2_chat_7b_qlora.py --deepspeed deepspeed_zero2
58 | ```
59 | - `--deepspeed` 表示使用 [DeepSpeed](https://github.com/microsoft/DeepSpeed) 🚀 来优化训练过程。XTuner 内置了多种策略,包括 ZeRO-1、ZeRO-2、ZeRO-3 等。如果用户期望关闭此功能,请直接移除此参数。
60 |
61 | - 更多示例,请查阅[文档](https://github.com/InternLM/xtuner/blob/main/docs/zh_cn/user_guides/finetune.md)。
62 |
63 | ## 4. 模型参数转换合并
64 | - pth 格式参数转换为 huggingface 格式
65 | ```shell
66 | # 创建用于存放Hugging Face格式参数的hf文件夹
67 | mkdir /root/SoulStar/finetune_config/xtuner_config/work_dirs/hf
68 |
69 | export MKL_SERVICE_FORCE_INTEL=1
70 |
71 | # 配置文件存放的位置
72 | export CONFIG_NAME_OR_PATH=/root/SoulStar/finetune_config/xtuner_config/internlm2_chat_7b_qlora.py
73 |
74 | # 模型训练后得到的pth格式参数存放的位置
75 | export PTH=/root/SoulStar/finetune_config/xtuner_config/work_dirs/internlm2_chat_7b_qlora/iter_2500.pth
76 |
77 | # pth文件转换为Hugging Face格式后参数存放的位置
78 | export SAVE_PATH=/root/SoulStar/finetune_config/xtuner_config/work_dirs/hf
79 |
80 | # 执行参数转换
81 | xtuner convert pth_to_hf $CONFIG_NAME_OR_PATH $PTH $SAVE_PATH
82 | ```
83 |
84 | - huggingface 格式参数合并
85 | ```shell
86 | export MKL_SERVICE_FORCE_INTEL=1
87 | export MKL_THREADING_LAYER='GNU'
88 |
89 | # 原始模型参数存放的位置
90 | export NAME_OR_PATH_TO_LLM=/root/model/Shanghai_AI_Laboratory/internlm2-chat-7b
91 |
92 | # Hugging Face格式参数存放的位置
93 | export NAME_OR_PATH_TO_ADAPTER=/root/SoulStar/finetune_config/xtuner_config/work_dirs/hf
94 |
95 | # 最终Merge后的参数存放的位置
96 | mkdir /root/model/internlm2-chat-7b-soulstar
97 | export SAVE_PATH=/root/model/internlm2-chat-7b-soulstar
98 |
99 | # 执行参数Merge
100 | xtuner convert merge \
101 | $NAME_OR_PATH_TO_LLM \
102 | $NAME_OR_PATH_TO_ADAPTER \
103 | $SAVE_PATH \
104 | --max-shard-size 2GB
105 | ```
106 |
--------------------------------------------------------------------------------
/nltk_data/corpora/cmudict.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nobody-ML/HeartLink/317c04bd94f834ce78d3e16bc6a62ee6e3f6f383/nltk_data/corpora/cmudict.zip
--------------------------------------------------------------------------------
/nltk_data/corpora/cmudict/README:
--------------------------------------------------------------------------------
1 | The Carnegie Mellon Pronouncing Dictionary [cmudict.0.7a]
2 |
3 | ftp://ftp.cs.cmu.edu/project/speech/dict/
4 | https://cmusphinx.svn.sourceforge.net/svnroot/cmusphinx/trunk/cmudict/cmudict.0.7a
5 |
6 | Copyright (C) 1993-2008 Carnegie Mellon University. All rights reserved.
7 |
8 | File Format: Each line consists of an uppercased word,
9 | a counter (for alternative pronunciations), and a transcription.
10 | Vowels are marked for stress (1=primary, 2=secondary, 0=no stress).
11 | E.g.: NATURAL 1 N AE1 CH ER0 AH0 L
12 |
13 | The dictionary contains 127069 entries. Of these, 119400 words are assigned
14 | a unique pronunciation, 6830 words have two pronunciations, and 839 words have
15 | three or more pronunciations. Many of these are fast-speech variants.
16 |
17 | Phonemes: There are 39 phonemes, as shown below:
18 |
19 | Phoneme Example Translation Phoneme Example Translation
20 | ------- ------- ----------- ------- ------- -----------
21 | AA odd AA D AE at AE T
22 | AH hut HH AH T AO ought AO T
23 | AW cow K AW AY hide HH AY D
24 | B be B IY CH cheese CH IY Z
25 | D dee D IY DH thee DH IY
26 | EH Ed EH D ER hurt HH ER T
27 | EY ate EY T F fee F IY
28 | G green G R IY N HH he HH IY
29 | IH it IH T IY eat IY T
30 | JH gee JH IY K key K IY
31 | L lee L IY M me M IY
32 | N knee N IY NG ping P IH NG
33 | OW oat OW T OY toy T OY
34 | P pee P IY R read R IY D
35 | S sea S IY SH she SH IY
36 | T tea T IY TH theta TH EY T AH
37 | UH hood HH UH D UW two T UW
38 | V vee V IY W we W IY
39 | Y yield Y IY L D Z zee Z IY
40 | ZH seizure S IY ZH ER
41 |
42 | (For NLTK, entries have been sorted so that, e.g. FIRE 1 and FIRE 2
43 | are contiguous, and not separated by FIRE'S 1.)
44 |
45 | Redistribution and use in source and binary forms, with or without
46 | modification, are permitted provided that the following conditions
47 | are met:
48 |
49 | 1. Redistributions of source code must retain the above copyright
50 | notice, this list of conditions and the following disclaimer.
51 | The contents of this file are deemed to be source code.
52 |
53 | 2. Redistributions in binary form must reproduce the above copyright
54 | notice, this list of conditions and the following disclaimer in
55 | the documentation and/or other materials provided with the
56 | distribution.
57 |
58 | This work was supported in part by funding from the Defense Advanced
59 | Research Projects Agency, the Office of Naval Research and the National
60 | Science Foundation of the United States of America, and by member
61 | companies of the Carnegie Mellon Sphinx Speech Consortium. We acknowledge
62 | the contributions of many volunteers to the expansion and improvement of
63 | this dictionary.
64 |
65 | THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
66 | ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
67 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
68 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
69 | NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
70 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
71 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
72 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
73 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
74 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
75 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
76 |
77 |
--------------------------------------------------------------------------------
/nltk_data/taggers/averaged_perceptron_tagger.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nobody-ML/HeartLink/317c04bd94f834ce78d3e16bc6a62ee6e3f6f383/nltk_data/taggers/averaged_perceptron_tagger.zip
--------------------------------------------------------------------------------
/nltk_data/taggers/averaged_perceptron_tagger/averaged_perceptron_tagger.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nobody-ML/HeartLink/317c04bd94f834ce78d3e16bc6a62ee6e3f6f383/nltk_data/taggers/averaged_perceptron_tagger/averaged_perceptron_tagger.pickle
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | tensorboard
4 | librosa==0.9.2
5 | numba==0.56.4
6 | pytorch-lightning
7 | # gradio==3.38.0
8 | # gradio_client==0.8.1
9 | ffmpeg-python
10 | onnxruntime
11 | tqdm
12 | funasr==1.0.0
13 | cn2an
14 | pypinyin
15 | pyopenjtalk
16 | g2p_en
17 | torchaudio
18 | modelscope==1.10.0
19 | sentencepiece
20 | transformers
21 | chardet
22 | PyYAML
23 | psutil
24 | jieba_fast
25 | jieba
26 | LangSegment>=0.2.0
27 | Faster_Whisper
28 | wordsegment
29 | lmdeploy==0.4.2
30 | streamlit==1.35.0
31 |
--------------------------------------------------------------------------------