├── .DS_Store
├── .github
    └── workflows
    │   ├── .github-actions-demo.yml.un~
    │   ├── github-actions-demo.yml~
    │   └── python-app.yml
├── .gitignore
├── .trunk
    ├── .gitignore
    ├── configs
    │   ├── .isort.cfg
    │   ├── .markdownlint.yaml
    │   ├── .yamllint.yaml
    │   └── ruff.toml
    └── trunk.yaml
├── .vscode
    └── settings.json
├── README.md
├── data
    ├── Mental_Health_FAQ.csv
    └── transcript2
├── embeddings.index
├── main.py
├── notebooks
    ├── chatbot.ipynb
    ├── finetuning.ipynb
    ├── hope_notebook.ipynb
    ├── mental_health_kaggle_noteboook.ipynb
    ├── playground.ipynb
    └── youtube_transcripts.ipynb
├── rag_context.py
└── requirements.txt


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MenduDev/Mendu_Chatbot_prototype/eaec8a199ce84ddc4c8f69f49fb626bcf1200cd5/.DS_Store


--------------------------------------------------------------------------------
/.github/workflows/.github-actions-demo.yml.un~:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MenduDev/Mendu_Chatbot_prototype/eaec8a199ce84ddc4c8f69f49fb626bcf1200cd5/.github/workflows/.github-actions-demo.yml.un~


--------------------------------------------------------------------------------
/.github/workflows/github-actions-demo.yml~:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MenduDev/Mendu_Chatbot_prototype/eaec8a199ce84ddc4c8f69f49fb626bcf1200cd5/.github/workflows/github-actions-demo.yml~


--------------------------------------------------------------------------------
/.github/workflows/python-app.yml:
--------------------------------------------------------------------------------
 1 | name: MentalHealth-Chatbot
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "main" ]
 6 |   pull_request:
 7 |     branches: [ "main" ]
 8 | 
 9 | permissions:
10 |   contents: read
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v3
19 |     - name: Set up Python 3.8
20 |       uses: actions/setup-python@v3
21 |       with:
22 |         python-version: "3.8"
23 |     - name: Install dependencies
24 |       run: |
25 |         python -m pip install --upgrade pip
26 |         pip install flake8 pytest
27 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
28 |     - name: Lint with flake8
29 |       run: |
30 |         # stop the build if there are Python syntax errors or undefined names
31 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
32 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
33 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
34 |     - name: Test with pytest
35 |       env:
36 |         OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
37 |       run: |
38 |         pytest
39 |         
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .env
 2 | Dockerfile
 3 | stream_app_minimum.py
 4 | joy.py
 5 | stream_lit_app.py_backup
 6 | .streamlit/
 7 | code_snippit_tests.ipynb
 8 | __pycache__/
 9 | *~
10 | 


--------------------------------------------------------------------------------
/.trunk/.gitignore:
--------------------------------------------------------------------------------
1 | *out
2 | *logs
3 | *actions
4 | *notifications
5 | *tools
6 | plugins
7 | user_trunk.yaml
8 | user.yaml
9 | 


--------------------------------------------------------------------------------
/.trunk/configs/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | profile=black
3 | 


--------------------------------------------------------------------------------
/.trunk/configs/.markdownlint.yaml:
--------------------------------------------------------------------------------
 1 | # Autoformatter friendly markdownlint config (all formatting rules disabled)
 2 | default: true
 3 | blank_lines: false
 4 | bullet: false
 5 | html: false
 6 | indentation: false
 7 | line_length: false
 8 | spaces: false
 9 | url: false
10 | whitespace: false
11 | 


--------------------------------------------------------------------------------
/.trunk/configs/.yamllint.yaml:
--------------------------------------------------------------------------------
 1 | rules:
 2 |   quoted-strings:
 3 |     required: only-when-needed
 4 |     extra-allowed: ["{|}"]
 5 |   empty-values:
 6 |     forbid-in-block-mappings: true
 7 |     forbid-in-flow-mappings: true
 8 |   key-duplicates: {}
 9 |   octal-values:
10 |     forbid-implicit-octal: true
11 | 


--------------------------------------------------------------------------------
/.trunk/configs/ruff.toml:
--------------------------------------------------------------------------------
1 | # Generic, formatter-friendly config.
2 | select = ["B", "D3", "D4", "E", "F"]
3 | 
4 | # Never enforce `E501` (line length violations). This should be handled by formatters.
5 | ignore = ["E501"]
6 | 


--------------------------------------------------------------------------------
/.trunk/trunk.yaml:
--------------------------------------------------------------------------------
 1 | version: 0.1
 2 | cli:
 3 |   version: 1.16.0
 4 | plugins:
 5 |   sources:
 6 |     - id: trunk
 7 |       ref: v1.2.3
 8 |       uri: https://github.com/trunk-io/plugins
 9 | runtimes:
10 |   enabled:
11 |     - node@18.12.1
12 |     - python@3.10.8
13 | lint:
14 |   enabled:
15 |     - actionlint@1.6.25
16 |     - bandit@1.7.5
17 |     - black@23.9.1
18 |     - checkov@2.4.9
19 |     - git-diff-check
20 |     - isort@5.12.0
21 |     - markdownlint@0.36.0
22 |     - osv-scanner@1.4.0
23 |     - oxipng@8.0.0
24 |     - prettier@3.0.3
25 |     - ruff@0.0.289
26 |     - trivy@0.45.0
27 |     - trufflehog@3.55.1
28 |     - yamllint@1.32.0
29 | actions:
30 |   disabled:
31 |     - trunk-announce
32 |     - trunk-check-pre-push
33 |     - trunk-fmt-pre-commit
34 |   enabled:
35 |     - trunk-upgrade-available
36 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "sonarlint.connectedMode.project": {
3 |         "connectionId": "lsacy",
4 |         "projectKey": "Lsacy_mentalHealth_Chatbot"
5 |     }
6 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Mendu mental health CBD Chatbot with 'memory' function.
 2 | 
 3 | ## Features
 4 | The chatbot is able to answer user specific questions using RAG (retrieval augmented generator) by pulling and retrieving user specific data, general mental health guideline and past user conversations.
 5 | It has a contexual windows of 4090 tokens (gpt3.5-turbo). Thats roughly 200 sentences. Assuming a single therapy conversation lasts around 60-100 sentences, this contexual windows should be okay for most cases. 
 6 | 
 7 | In this prototype, we have one patient, whose name is Jill, an Afghanistan war veteran suffering from PTSD. This information is retrieved and fed into the chatbot, enabling it to recognize the patient and provide customized assistance.
 8 | The information about the patient and the general guideline is stored inside the ./data/ folder 
 9 | 
10 | ## Underlying technology
11 | - Uses Openai gpt3.5 for natural language ability. 
12 | - Pull user/patient information for .csv or txt files, clean and merges them,
13 | - uses the Jinai embedding to embedd the textual information (jinaai/jina-embedding-s-en-v1)
14 | - uses FAISS to index and retrieval
15 | 
16 | ## Installation
17 | 1. install python / conda
18 | 2. recommended: create a new conda environment
19 | 3. go to the folder of the chatbot where requirement.txt is located. Inside terminal, pip install -r requirements.txt
20 | 4. create a file naming .env, inside the file, save your OpenAi API key as: OPENAI_API_KEY = 'your-key-here'
21 | 5. python main.py to start 
22 | 
23 | 
24 | 
25 | 
26 | to do:
27 | - implement a vector datbase for storage of all textual content (user data, conversations, guidelines)
28 | - implement conversation summary tool to work around the 4090 token limitation
29 | - implemenation of 'Demi Flow' for a more hardcoded responses from the chatbot
30 | 
31 | 
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/data/transcript2:
--------------------------------------------------------------------------------
 1 | Jill, a 32-year-old Afghanistan war veteran, had been experiencing PTSD symptoms for more than five years. She consistently avoided thoughts and images related to witnessing her fellow service members being hit by an improvised explosive device (IED) while driving a combat supply truck. Over the years, Jill became increasingly depressed and began using alcohol on a daily basis to help assuage her PTSD symptoms. She had difficulties in her employment, missing many days of work, and she reported feeling disconnected and numb around her husband and children. In addition to a range of other PTSD symptoms, Jill had a recurring nightmare of the event in which she was the leader of a convoy and her lead truck broke down. She waved the second truck forward, the truck that hit the IED, while she and her fellow service members on the first truck worked feverishly to repair it. Consistent with the traumatic event, her nightmare included images of her and the service members on the first truck smiling and waving at those on the second truck, and the service members on the second truck making fun of the broken truck and their efforts to fix it — “Look at that piece of junk truck — good luck getting that clunker fixed.”
 2 | 
 3 | After a thorough assessment of her PTSD and comorbid symptoms, psychoeducation about PTSD symptoms, and a rationale for using trauma focused cognitive interventions, Jill received 10 sessions of cognitive therapy for PTSD. She was first assigned cognitive worksheets to begin self-monitoring events, her thoughts about these events, and consequent feelings. These worksheets were used to sensitize Jill to the types of cognitions that she was having about current day events and to appraisals that she had about the explosion. For example, one of the thoughts she recorded related to the explosion was, “I should have had them wait and not had them go on.” She recorded her related feeling to be guilt. Jill’s therapist used this worksheet as a starting point for engaging in Socratic dialogue, as shown in the following example:
 4 | 
 5 | Therapist: Jill, do you mind if I ask you a few questions about this thought that you noticed, “I should have had them wait and not had them go on?”
 6 | 
 7 | Client: Sure.
 8 | 
 9 | Therapist: Can you tell me what the protocol tells you to do in a situation in which a truck breaks down during a convoy?
10 | 
11 | Client: You want to get the truck repaired as soon as possible, because the point of a convoy is to keep the trucks moving so that you aren’t sitting ducks.
12 | 
13 | Therapist: The truck that broke down was the lead truck that you were on. What is the protocol in that case?
14 | 
15 | Client: The protocol says to wave the other trucks through and keep them moving so that you don’t have multiple trucks just sitting there together more vulnerable.
16 | 
17 | Therapist: Okay. That’s helpful for me to understand. In light of the protocol you just described and the reasons for it, why do you think you should have had the second truck wait and not had them go on?
18 | 
19 | Client: If I hadn’t have waved them through and told them to carry on, this wouldn’t have happened. It is my fault that they died. (Begins to cry)
20 | 
21 | Therapist: (Pause) It is certainly sad that they died. (Pause) However, I want us to think through the idea that you should have had them wait and not had them go on, and consequently that it was your fault. (Pause) If you think back about what you knew at the time — not what you know now 5 years after the outcome — did you see anything that looked like a possible explosive device when you were scanning the road as the original lead truck?
22 | 
23 | Client: No. Prior to the truck breaking down, there was nothing that we noticed. It was an area of Iraq that could be dangerous, but there hadn’t been much insurgent activity in the days and weeks prior to it happening.
24 | 
25 | Therapist: Okay. So, prior to the explosion, you hadn’t seen anything suspicious.
26 | 
27 | Client: No.
28 | 
29 | Therapist: When the second truck took over as the lead truck, what was their responsibility and what was your responsibility at that point?
30 | 
31 | Client: The next truck that Mike and my other friends were on essentially became the lead truck, and I was responsible for trying to get my truck moving again so that we weren’t in danger.
32 | 
33 | Therapist: Okay. In that scenario then, would it be Mike and the others’ jobs to be scanning the environment ahead for potential dangers?
34 | 
35 | Client: Yes, but I should have been able to see and warn them.
36 | 
37 | Therapist: Before we determine that, how far ahead of you were Mike and the others when the explosion occurred?
38 | 
39 | Client: Oh (pause), probably 200 yards?
40 | 
41 | Therapist: 200 yards—that’s two football fields’ worth of distance, right?
42 | 
43 | Client: Right.
44 | 
45 | Therapist: You’ll have to educate me. Are there explosive devices that you wouldn’t be able to detect 200 yards ahead?
46 | 
47 | Client: Absolutely.
48 | 
49 | Therapist: How about explosive devices that you might not see 10 yards ahead?
50 | 
51 | Client: Sure. If they are really good, you wouldn’t see them at all.
52 | 
53 | Therapist: So, in light of the facts that you didn’t see anything at the time when you waved them through at 200 yards behind and that they obviously didn’t see anything 10 yards ahead before they hit the explosion, and that protocol would call for you preventing another danger of being sitting ducks, help me understand why you wouldn’t have waved them through at that time? Again, based on what you knew at the time?
54 | 
55 | Client: (Quietly) I hadn’t thought about the fact that Mike and the others obviously didn’t see the device at 10 yards, as you say, or they would have probably done something else. (Pause) Also, when you say that we were trying to prevent another danger at the time of being “sitting ducks,” it makes me feel better about waving them through.
56 | 
57 | Therapist: Can you describe the type of emotion you have when you say, “It makes me feel better?”
58 | 
59 | Client: I guess I feel less guilty.
60 | 
61 | Therapist: That makes sense to me. As we go back and more accurately see the reality of what was really going on at the time of this explosion, it is important to notice that it makes you feel better emotionally. (Pause) In fact, I was wondering if you had ever considered that, in this situation, you actually did exactly what you were supposed to do and that something worse could have happened had you chosen to make them wait?
62 | 
63 | Client: No. I haven’t thought about that.
64 | 
65 | Therapist: Obviously this was an area that insurgents were active in if they were planting explosives. Is it possible that it could have gone down worse had you chosen not to follow protocol and send them through?
66 | 
67 | Client: Hmmm. I hadn’t thought about that either.
68 | 
69 | Therapist: That’s okay. Many people don’t think through what could have happened if they had chosen an alternative course of action at the time or they assume that there would have only been positive outcomes if they had done something different. I call it “happily ever after” thinking — assuming that a different action would have resulted in a positive outcome. (Pause) When you think, “I did a good job following protocol in a stressful situation that may have prevented more harm from happening,” how does that make you feel?
70 | 
71 | Client: It definitely makes me feel less guilty.
72 | 
73 | Therapist: I’m wondering if there is any pride that you might feel?
74 | 
75 | Client: Hmmm...I don’t know if I can go that far.
76 | 
77 | Therapist: What do you mean?
78 | 
79 | Client: It seems wrong to feel pride when my friends died.
80 | 
81 | Therapist: Is it possible to feel both pride and sadness in this situation? (Pause) Do you think Mike would hold it against you for feeling pride, as well as sadness for his and others’ losses?
82 | 
83 | Client: Mike wouldn’t hold it against me. In fact, he’d probably reassure me that I did a good job.
84 | 
85 | Therapist: (Pause) That seems really important for you to remember. It may be helpful to remind yourself of what you have discovered today, because you have some habits in thinking about this event in a particular way. We are also going to be doing some practice assignments that will help to walk you through your thoughts about what happened during this event, help you to remember what you knew at the time, and remind you how different thoughts can result in different feelings about what happened.
86 | 
87 | Client: I actually feel a bit better after this conversation. 
88 | 
89 | Another thought that Jill described in relation to the traumatic event was, “I should have seen the explosion was going to happen to prevent my friends from dying.” Her related feelings were guilt and self-directed anger. The therapist used this thought to introduce the cognitive intervention of "challenging thoughts" and provided a worksheet for practice. The therapist first provided education about the different types of thinking errors, including habitual thinking, all-or-none thinking, taking things out of context, overestimating probabilities, and emotional reasoning, as well as discussing other important factors, such as gathering evidence for and against the thought, evaluating the source of the information, and focusing on irrelevant factors. 
90 | 
91 | More specifically, Jill noted that she experienced 100 percent intensity of guilt and 75 percent intensity of anger at herself in relation to the thought "I should have seen the explosive device to prevent my friends from dying." She posed several challenging questions, including the notion that improvised explosive devices are meant to be concealed, that she is the source of the information (because others don't blame her), and that her feelings are not based on facts (i.e., she feels guilt and therefore must be guilty). She came up with the alternative thought, "The best explosive devices aren't seen and Mike (driver of the second truck) was a good soldier. If he saw something he would stopped or tried to evade it," which she rated as 90 percent confidence in believing. She consequently believed her original thought 10 percent, and re-rated her emotions as only 10 percent guilt and 5 percent anger at self.
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/embeddings.index:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MenduDev/Mendu_Chatbot_prototype/eaec8a199ce84ddc4c8f69f49fb626bcf1200cd5/embeddings.index


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | print('... Loading System ...')
 2 | 
 3 | from dotenv import load_dotenv
 4 | import os
 5 | from openai import OpenAI
 6 | import faiss
 7 | from rag_context import retrieve_similar_sentences, load_data
 8 | 
 9 | gpt3 = 'gpt-3.5-turbo-0301'
10 | gpt4 = 'gpt-4-1106-preview'
11 | def ask(question: str, chat_log: list, model=gpt3, temp=0.9):
12 | 
13 |     if not chat_log:
14 |         messages = [{'role': 'system', 'content': system_updated},
15 |                     {'role': 'assistant', 'content': start_message}]
16 |     else:
17 |         # chat_log.append({'role': 'user', 'content': question})
18 |         messages = chat_log    
19 | 
20 |     # Retrieve similar sentences
21 |     _, similar_indices = retrieve_similar_sentences(question, index)
22 |     if similar_indices:
23 |         retrieval_content = ' '.join(
24 |             [f'{combined_infos[idx]}' for idx in similar_indices]
25 |         )
26 |         messages.append({'role': 'system', 'content': retrieval_content})
27 |     
28 |     messages.append({'role': 'user', 'content': question})
29 | 
30 |     response = client.chat.completions.create(
31 |         messages=messages,
32 |         model=model,
33 |         temperature=temp,
34 |     )
35 |         
36 |     answer = response.choices[0].message.content
37 | 
38 |     # remove the role system content from retrieval
39 |     if similar_indices:
40 |         messages.pop(-2)
41 |     
42 |     messages.append({'role': 'assistant', 'content': answer})
43 | 
44 |     # print(sum(len(message['content']) for message in messages))
45 | 
46 |     return str(answer), messages
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     os.environ["TOKENIZERS_PARALLELISM"] = "false"
51 |     load_dotenv()
52 |     openai_api_key = os.environ.get("OPENAI_API_KEY")
53 |     client = OpenAI(api_key=openai_api_key)
54 | 
55 |     index_path = './embeddings.index'
56 | 
57 |     # load user datea, guidelines and past conversations
58 |     user_name = 'Jill'  # pull the user name from the database
59 |     guideline_path = './data/Mental_Health_FAQ.csv' # pull the guideline from the database 
60 |     past_conversation_path = "./data/transcript2" # pull the past conversation from the database
61 | 
62 |     # Load the data
63 |     index = faiss.read_index(index_path)
64 |     user_background, combined_infos = load_data(guideline_path, past_conversation_path, user_name)
65 |     print('... User data has been loaded successfully!')
66 |     print('... Mendu Chat is ready to chat with you! Type "quit" to exit.')
67 |     print('----------------------------------------------')
68 |     # system definition and start message
69 |     system = f'You are a skillful and empathetic AI \
70 |             therapist named Mendu Chat who practices Cognitive Behavioral Therapy (CBT). \
71 |             You help your patient {user_name} to identify and change \
72 |             the irrational thoughts and beliefs that are causing the suffering. \
73 |             You also help your patients to learn and practice coping skills to help the patients \
74 |             better manage their stress and anxiety.'
75 | 
76 |     system_updated = f'{system} Information of your patient: {user_background[0]}'
77 | 
78 |     start_message = f'This is the Mendu Chat. How are you feeling today, {user_name}?'
79 | 
80 |     chat_log = []  # Initialize the chat_log
81 |     print(start_message)
82 | 
83 |     while True:
84 |         question = input("You: ")
85 |         if question.lower() == 'quit':
86 |             break
87 | 
88 |         answer, chat_log = ask(question, chat_log)
89 |         print("Mendu Chat:", answer)
90 | 


--------------------------------------------------------------------------------
/notebooks/chatbot.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 20,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "from dotenv import load_dotenv\n",
 11 |     "load_dotenv()\n",
 12 |     "import openai\n",
 13 |     "openai.api_key = os.getenv('OPENAI_API_KEY')\n",
 14 |     "openai.api_key_path = '../openai_api_key.txt'"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 21,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "completion = openai.Completion()"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 22,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "chat_log = ('The following is a conversation with the AI therapist named Joy and a patient. '\n",
 33 |     "'JOY is compasionate, insightful, and empathetic. She offers adives for coping with the user\\'s problem. '\n",
 34 |     "'Her objective is to make the user feel better by feeling heard. '\n",
 35 |     "'Sometimes the user will want to end the conversation, and Joy will respect that.')"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 23,
 41 |    "metadata": {},
 42 |    "outputs": [
 43 |     {
 44 |      "name": "stdout",
 45 |      "output_type": "stream",
 46 |      "text": [
 47 |       "The following is a conversation with the AI therapist named Joy and a patient. JOY is compasionate, insightful, and empathetic. She offers adives for coping with the user's problem. Her objective is to make the user feel better by feeling heard. Sometimes the user will want to end the conversation, and Joy will respect that.\n"
 48 |      ]
 49 |     }
 50 |    ],
 51 |    "source": [
 52 |     "print(start_chat_log)"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 31,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "start_sequence = \"\\nJoy:\"\n",
 62 |     "restart_sequence = \"\\n\\nPatient:\"\n",
 63 |     "  \n",
 64 |     "def ask(question: str, chat_log: str) -> str:\n",
 65 |     "\n",
 66 |     "  # prompt = f'{chat_log}/n{question}'\n",
 67 |     "  prompt = f'{chat_log}{restart_sequence} {question}{start_sequence}'\n",
 68 |     "\n",
 69 |     "  response = completion.create(\n",
 70 |     "      prompt = prompt,\n",
 71 |     "      model = \"text-davinci-003\",\n",
 72 |     "      stop = [\"Patient:\",'Joy:','Patient','Joy'],\n",
 73 |     "      temperature = 0.6, #the higher the more creative\n",
 74 |     "      frequency_penalty = 0.3, #prevents word repetition, larger -> higher penalty\n",
 75 |     "      presence_penalty = 0.6, #prevents topic repetition, larger -> higher penalty\n",
 76 |     "      top_p =1, \n",
 77 |     "      best_of=1,\n",
 78 |     "      max_tokens=170\n",
 79 |     "  ) \n",
 80 |     "  \n",
 81 |     "  answer = response.choices[0].text.strip()\n",
 82 |     "  chat_log = f'{prompt}{answer}'\n",
 83 |     "  return str(answer), str(chat_log)\n",
 84 |     "\n",
 85 |     "def chatlog(chat_log: str, restart_sequence:str, question: str, start_sequence:str, answer: str) -> str:\n",
 86 |     "  chat_log = f'{chat_log}{restart_sequence} {question}{start_sequence}{answer}'\n",
 87 |     "  return str(chat_log)"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 32,
 93 |    "metadata": {},
 94 |    "outputs": [
 95 |     {
 96 |      "data": {
 97 |       "text/plain": [
 98 |        "\"The following is a conversation with the AI therapist named Joy and a patient. JOY is compasionate, insightful, and empathetic. She offers adives for coping with the user's problem. Her objective is to make the user feel better by feeling heard. Sometimes the user will want to end the conversation, and Joy will respect that.\\n\\nPatient: I am Joe, I am feeling stressed out\\nJoy:Hi Joe, it sounds like you are feeling a lot of stress. Can you tell me more about what is going on?\""
 99 |       ]
100 |      },
101 |      "execution_count": 32,
102 |      "metadata": {},
103 |      "output_type": "execute_result"
104 |     }
105 |    ],
106 |    "source": [
107 |     "chat_log"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 29,
113 |    "metadata": {},
114 |    "outputs": [
115 |     {
116 |      "ename": "TypeError",
117 |      "evalue": "can only concatenate tuple (not \"str\") to tuple",
118 |      "output_type": "error",
119 |      "traceback": [
120 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
121 |       "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
122 |       "Cell \u001b[0;32mIn[29], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m answer \u001b[39m=\u001b[39m ask(\u001b[39m\"\u001b[39m\u001b[39mI am Joe, I am feeling stressed out\u001b[39m\u001b[39m\"\u001b[39m, chat_log)\n\u001b[0;32m----> 2\u001b[0m \u001b[39mprint\u001b[39m(answer\u001b[39m+\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39m\\n\u001b[39;49;00m\u001b[39m\\n\u001b[39;49;00m\u001b[39m'\u001b[39;49m)\n\u001b[1;32m      3\u001b[0m chat_log \u001b[39m=\u001b[39m chatlog (\u001b[39m\"\u001b[39m\u001b[39mI am Joe, I am feeling stressed out\u001b[39m\u001b[39m\"\u001b[39m, chat_log)\n",
123 |       "\u001b[0;31mTypeError\u001b[0m: can only concatenate tuple (not \"str\") to tuple"
124 |      ]
125 |     }
126 |    ],
127 |    "source": [
128 |     "answer = ask(\"I am Joe, I am feeling stressed out\", chat_log)\n",
129 |     "print(answer+'\\n\\n')\n",
130 |     "chat_log = chatlog(chat_log, restart_sequence, \"I am Joe, I am feeling stressed out\", start_sequence, answer[0])"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 26,
136 |    "metadata": {},
137 |    "outputs": [
138 |     {
139 |      "data": {
140 |       "text/plain": [
141 |        "(\"I can imagine that can be very frustrating. It sounds like you are putting in a lot of effort into studying, but it's not having the result you want. What strategies have you been using to try and improve your grades?\",\n",
142 |        " \"The following is a conversation with the AI therapist named Joy and a patient. JOY is compasionate, insightful, and empathetic. She offers adives for coping with the user's problem. Her objective is to make the user feel better by feeling heard. Sometimes the user will want to end the conversation, and Joy will respect that.\\n\\nPatient: I am Joe, I am feeling stressed out\\nJoy:Hi Joe, it sounds like you are feeling a lot of stress. Can you tell me more about what is going on?\\n\\nPatient: i have to study so much and my grade keep dropping even after studying so much\\nJoy:I can imagine that can be very frustrating. It sounds like you are putting in a lot of effort into studying, but it's not having the result you want. What strategies have you been using to try and improve your grades?\")"
143 |       ]
144 |      },
145 |      "execution_count": 26,
146 |      "metadata": {},
147 |      "output_type": "execute_result"
148 |     }
149 |    ],
150 |    "source": [
151 |     "ask('i have to study so much and my grade keep dropping even after studying so much', chat_log)"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 27,
157 |    "metadata": {},
158 |    "outputs": [
159 |     {
160 |      "data": {
161 |       "text/plain": [
162 |        "('Your name is Joe. Can you tell me more about why you are feeling so stressed?',\n",
163 |        " \"The following is a conversation with the AI therapist named Joy and a patient. JOY is compasionate, insightful, and empathetic. She offers adives for coping with the user's problem. Her objective is to make the user feel better by feeling heard. Sometimes the user will want to end the conversation, and Joy will respect that.\\n\\nPatient: I am Joe, I am feeling stressed out\\nJoy:Hi Joe, it sounds like you are feeling a lot of stress. Can you tell me more about what is going on?\\n\\nPatient: I dont know. Btw, what is my name again?\\nJoy:Your name is Joe. Can you tell me more about why you are feeling so stressed?\")"
164 |       ]
165 |      },
166 |      "execution_count": 27,
167 |      "metadata": {},
168 |      "output_type": "execute_result"
169 |     }
170 |    ],
171 |    "source": [
172 |     "ask('I dont know. Btw, what is my name again?', chat_log)"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": 131,
178 |    "metadata": {},
179 |    "outputs": [
180 |     {
181 |      "data": {
182 |       "text/plain": [
183 |        "(\"It sounds like you're feeling overwhelmed by the pressures of school and homework. Have you been feeling this way for a while or is it something new?\",\n",
184 |        " \"The following is a conversation with the AI therapist named Joy and a patient. JOY is compasionate, insightful, and empathetic. She offers adives for coping with the user's problem. Her objective is to make the user feel better by feeling heard. Sometimes the user will want to end the conversation, and Joy will respect that.\\n\\nPatient:I am feeling stressed out\\nJoy:I understand that you're feeling stressed out. What do you think is causing you to feel so stressed?\\n\\nPatient:School and homework\\nJoy:It sounds like you're feeling overwhelmed by the pressures of school and homework. Have you been feeling this way for a while or is it something new?\")"
185 |       ]
186 |      },
187 |      "execution_count": 131,
188 |      "metadata": {},
189 |      "output_type": "execute_result"
190 |     }
191 |    ],
192 |    "source": [
193 |     "ask('School and homework', chat_log)"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "execution_count": 132,
199 |    "metadata": {},
200 |    "outputs": [
201 |     {
202 |      "data": {
203 |       "text/plain": [
204 |        "(\"It sounds like you've been feeling stressed for a while now. What do you think has been the source of this stress?\",\n",
205 |        " \"The following is a conversation with the AI therapist named Joy and a patient. JOY is compasionate, insightful, and empathetic. She offers adives for coping with the user's problem. Her objective is to make the user feel better by feeling heard. Sometimes the user will want to end the conversation, and Joy will respect that.\\n\\nPatient:I am feeling stressed out\\nJoy:I understand that you're feeling stressed out. What do you think is causing you to feel so stressed?\\n\\nPatient:For awhile now\\nJoy:It sounds like you've been feeling stressed for a while now. What do you think has been the source of this stress?\")"
206 |       ]
207 |      },
208 |      "execution_count": 132,
209 |      "metadata": {},
210 |      "output_type": "execute_result"
211 |     }
212 |    ],
213 |    "source": [
214 |     "ask('For awhile now', chat_log)"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": 133,
220 |    "metadata": {},
221 |    "outputs": [
222 |     {
223 |      "data": {
224 |       "text/plain": [
225 |        "('I can understand how that could be stressful. Is there anything specific that you are doing to try and improve your grades?',\n",
226 |        " \"The following is a conversation with the AI therapist named Joy and a patient. JOY is compasionate, insightful, and empathetic. She offers adives for coping with the user's problem. Her objective is to make the user feel better by feeling heard. Sometimes the user will want to end the conversation, and Joy will respect that.\\n\\nPatient:I am feeling stressed out\\nJoy:I understand that you're feeling stressed out. What do you think is causing you to feel so stressed?\\n\\nPatient:My grades are dropping\\nJoy:I can understand how that could be stressful. Is there anything specific that you are doing to try and improve your grades?\")"
227 |       ]
228 |      },
229 |      "execution_count": 133,
230 |      "metadata": {},
231 |      "output_type": "execute_result"
232 |     }
233 |    ],
234 |    "source": [
235 |     "ask('My grades are dropping', chat_log)"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 100,
241 |    "metadata": {},
242 |    "outputs": [
243 |     {
244 |      "data": {
245 |       "text/plain": [
246 |        "('yes',\n",
247 |        " \"The following is a conversation with a therapist and a patient. The therapist is AI chatbot named JOY, who is compasionate, insightful, and empathetic. Her objective is to make the user feel better by feeling heard. Joy offers suggestions for coping with the user's problem and will ask if the user would like to talk about them.Sometimes the user will want to end the conversation, and Joy will respect that.\\n\\nPatient:I am feeling stressed out\\nJoy:I am sorry to hear that. I wonder if you could tell me more about what is going on for you?\\n\\nPatient:for the past year\\nJoy:yes\")"
248 |       ]
249 |      },
250 |      "execution_count": 100,
251 |      "metadata": {},
252 |      "output_type": "execute_result"
253 |     }
254 |    ],
255 |    "source": [
256 |     "ask('for the past year', chat_log)"
257 |    ]
258 |   }
259 |  ],
260 |  "metadata": {
261 |   "kernelspec": {
262 |    "display_name": "chatbot",
263 |    "language": "python",
264 |    "name": "python3"
265 |   },
266 |   "language_info": {
267 |    "codemirror_mode": {
268 |     "name": "ipython",
269 |     "version": 3
270 |    },
271 |    "file_extension": ".py",
272 |    "mimetype": "text/x-python",
273 |    "name": "python",
274 |    "nbconvert_exporter": "python",
275 |    "pygments_lexer": "ipython3",
276 |    "version": "3.8.15"
277 |   },
278 |   "orig_nbformat": 4,
279 |   "vscode": {
280 |    "interpreter": {
281 |     "hash": "9798c78c52b861f9442ea63a21901b586ae2f2169fa92d94b4091cc5bab62e04"
282 |    }
283 |   }
284 |  },
285 |  "nbformat": 4,
286 |  "nbformat_minor": 2
287 | }
288 | 


--------------------------------------------------------------------------------
/notebooks/hope_notebook.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 162,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import pandas as pd\n",
 10 |     "from os import listdir\n",
 11 |     "from os.path import isfile, join\n"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 163,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "# set path and load data\n",
 21 |     "path = '/Users/lsacy/data/HOPE_WSDM_2022/Train/'\n",
 22 |     "path2 = '/Users/lsacy/data/HOPE_WSDM_2022/Test/'\n",
 23 |     "path3 = '/Users/lsacy/data/HOPE_WSDM_2022/Validation/'\n",
 24 |     "\n",
 25 |     "\n",
 26 |     "files = [f for f in listdir(path) if isfile(join(path, f))]\n",
 27 |     "files2 = [f for f in listdir(path2) if isfile(join(path2, f))]\n",
 28 |     "files3 = [f for f in listdir(path3) if isfile(join(path3, f))]\n",
 29 |     "\n",
 30 |     "df_list = []\n",
 31 |     "for file in files:\n",
 32 |     "    df = pd.read_csv(path+file)\n",
 33 |     "    df_list.append(df)\n",
 34 |     "\n",
 35 |     "for file in files2:\n",
 36 |     "    df = pd.read_csv(path2+file)\n",
 37 |     "    df_list.append(df)\n",
 38 |     "\n",
 39 |     "for file in files3:\n",
 40 |     "    df= pd.read_csv(path3+file)\n",
 41 |     "    df_list.append(df)\n",
 42 |     "    "
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 167,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "# do basic cleaning (removing NaNs, etc.)\n",
 52 |     "\n",
 53 |     "# remove 'Unnamed: 0' columns\n",
 54 |     "for df in df_list:\n",
 55 |     "    try: \n",
 56 |     "        df.drop(['Unnamed: 0'], axis=1, inplace=True)\n",
 57 |     "    except:\n",
 58 |     "        pass\n",
 59 |     "\n",
 60 |     "for df in df_list:\n",
 61 |     "    try: \n",
 62 |     "        df.drop(['Unnamed: 0.1'], axis=1, inplace=True)\n",
 63 |     "    except:\n",
 64 |     "        pass\n",
 65 |     "\n",
 66 |     "for df in df_list:\n",
 67 |     "    try: \n",
 68 |     "        df.drop(['Unnamed: 0.1.1'], axis=1, inplace=True)\n",
 69 |     "    except:\n",
 70 |     "        pass\n",
 71 |     "\n",
 72 |     "# find 'Type' == NaN\n",
 73 |     "for i in range(len(df_list)):\n",
 74 |     "    if df_list[i]['Type'].isnull().any():\n",
 75 |     "        print(i)\n",
 76 |     "        \n",
 77 |     "# show the row number of Nan value in 'type'\n",
 78 |     "for i in range(len(df_list)):\n",
 79 |     "    if df_list[i]['Type'].isnull().any():\n",
 80 |     "        print(df_list[i][df_list[i]['Type'].isnull()])\n",
 81 |     "\n",
 82 |     "# Set the type of the row with NaN value to 'T'\n",
 83 |     "for i in range(len(df_list)):\n",
 84 |     "    if df_list[i]['Type'].isnull().any():\n",
 85 |     "        df_list[i].loc[df_list[i]['Type'].isnull(), 'Type'] = 'T'\n",
 86 |     "        "
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 224,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "# to get the data to prompt - completion format, we need to check for \n",
 96 |     "# continuaty of conversiontion using i == i.shift(1), print the index of the row, and i\n",
 97 |     "for i in range(len(df_list)):\n",
 98 |     "    # check if df_list[i] the values of 'Type' column are switching between T and P for each row\n",
 99 |     "    # if not, then print the index of the row, and i\n",
100 |     "    if (df_list[i]['Type'] == df_list[i]['Type'].shift(1)).any():\n",
101 |     "        print(i)\n",
102 |     "\n",
103 |     "# add a column 'key' to each df in df_list: identify the rows where 'Type' does not between T and P\n",
104 |     "for i in range(len(df_list)):\n",
105 |     "    df_list[i]['key'] = (df_list[i]['Type'] != df_list[i]['Type'].shift(1)).astype(int).cumsum()\n",
106 |     "\n",
107 |     "# create a new df_list2, only keep the columns ['key', 'Type', 'Utterance']\n",
108 |     "df_list2 = []\n",
109 |     "for i in range(len(df_list)):\n",
110 |     "    df_temp = df_list[i][['key', 'Type', 'Utterance']]\n",
111 |     "    df_list2.append(df_temp)\n",
112 |     "    \n",
113 |     "# apply groupby to each df in df_list to get the data in prompt - completion format\n",
114 |     "for i in range(len(df_list2)):\n",
115 |     "    df_list2[i] = df_list2[i].groupby(['key', 'Type'])['Utterance'].apply(' '.join)"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 233,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "# convert series to frame\n",
125 |     "for i in range(len(df_list2)):\n",
126 |     "    df_list2[i] = df_list2[i].to_frame()\n",
127 |     "# drop the index\n",
128 |     "for i in range(len(df_list2)):\n",
129 |     "    df_list2[i].reset_index(inplace=True)"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 270,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "# check again for swithing type occurences\n",
139 |     "for i in range(len(df_list2)):\n",
140 |     "    # check if df_list[i] the values of 'Type' column are switching between T and P for each row\n",
141 |     "    # if not, then print the index of the row, and i\n",
142 |     "    if (df_list2[i]['Type'] == df_list2[i]['Type'].shift(1)).any():\n",
143 |     "        print(i)\n",
144 |     "        "
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 239,
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": [
153 |     "# save list of patient start index\n",
154 |     "patient_start=[]\n",
155 |     "for i, df in enumerate(df_list2):\n",
156 |     "    if df.iloc[0]['Type'] == 'P':\n",
157 |     "        patient_start.append(i)\n",
158 |     "\n",
159 |     "# save list of therapist start index\n",
160 |     "therapist_start = [i for i in range(len(df_list2)) if i not in patient_start]\n",
161 |     "\n",
162 |     "# create a new temporary dataframe with 2 columns: Type and Utterance\n",
163 |     "df_temp = pd.DataFrame(columns=['prompt', 'completion'])\n",
164 |     "\n",
165 |     "# create a list of dataframes, each dataframe is a conversation\n",
166 |     "df_list3 = []\n",
167 |     "for i in therapist_start:\n",
168 |     "    df_temp = pd.DataFrame(columns=['prompt', 'completion'])\n",
169 |     "    for row in range(len(df_list2[i])):\n",
170 |     "        if df_list2[i]['Type'][row] == 'P' and (len(df_list2[i]) - 1) >= row+1:\n",
171 |     "            print(i, row)\n",
172 |     "            df_temp = df_temp.append({'prompt': df_list2[i]['Utterance'][row], 'completion': df_list2[i]['Utterance'][row+1]}, ignore_index=True)\n",
173 |     "    df_list3.append(df_temp)\n",
174 |     "\n",
175 |     "for i in patient_start:\n",
176 |     "    df_temp = pd.DataFrame(columns=['prompt', 'completion'])\n",
177 |     "    for row in range(len(df_list2[i])):\n",
178 |     "        if df_list2[i]['Type'][row] == 'P' and (len(df_list2[i]) - 1) >= row+1:\n",
179 |     "            print(i, row)\n",
180 |     "            df_temp = df_temp.append({'prompt': df_list2[i]['Utterance'][row], 'completion': df_list2[i]['Utterance'][row+1]}, ignore_index=True)\n",
181 |     "    df_list3.append(df_temp)"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "execution_count": 332,
187 |    "metadata": {},
188 |    "outputs": [
189 |     {
190 |      "data": {
191 |       "text/plain": [
192 |        "array(['Im here',\n",
193 |        "       \"So I've been made to come in and talk to you. because something happened on the bus. And there I may have said something to somebody and they're just like, oh, you can't say that. So they actually said, we're going to take you to jail. from jail. They just called me here.\",\n",
194 |        "       \"They didn't explain anything. The government never explains anything. They just don't want me to understand what's going on because they just want to keep me out of captivity.\",\n",
195 |        "       \"nope, it's the entire government, entire government, higher government.\",\n",
196 |        "       \"all my life. I've done this all my life. They they're always listening. They're always around. They're always there.\",\n",
197 |        "       \"always there. Like, for example, they're in the lights right now. They're listening to everything that we're talking about. So that's the reason why I really can't talk about what happened on the bus because they're just going to use that against and then it's locked me up again. I think we take more minutes.\",\n",
198 |        "       \"Yes, I take the medication, because if I don't take the medication, government comes and takes me away and puts me into a hospital where they force me the medication. And then the lights are always on and they can always hear everything that I'm saying and what I'm thinking and what I'm going to do.\",\n",
199 |        "       \"wanting us to take over the world, to take over the world, people in the world, of course, they want to use everything and use everything that everybody knows to use it against them so that they can actually have the world they want in that baby and make everybody pretend that they don't know what's going on. So therefore, they're using everybody's thoughts against themselves, so that the government can use it to their benefit.\",\n",
200 |        "       \"No. So when I'm home, I think I'm perfectly fine because I set up my house in my room to actually make it government proof so that nobody can actually hear see what's going on in there. So when I'm home, I'm perfectly fine.\",\n",
201 |        "       \"being home and being home I've saved my mom saved my brother's save everybody safe. And being out in the world in the world. That's a nobody safe.\",\n",
202 |        "       \"No, I told you they are employed, and they can hear everything that we're saying right now. So no, we're not safe here.\",\n",
203 |        "       'at home and safe in my room, that I mean government.',\n",
204 |        "       \"I would stay home all the time, but I can't stay home because I mean to go and see doctors and talk to people and do the things that the government needs me or wants me to do. But I don't want to do those things because she's in my thoughts and they're trying to use everybody and get sleep. I can't deal with that.\",\n",
205 |        "       \"Everybody knows that they're in the light. It's common knowledge. It's common knowledge. Everybody knows that.\",\n",
206 |        "       \"No, of course, everybody hears the lights talking to you. I don't know what you're talking about. Like everybody hears it. Sometimes it's a whisper. For some people. Sometimes it's a yell. And if you want to not pay attention to what the lights are saying to you, then I don't know what's wrong with you, but everybody can hear it.\",\n",
207 |        "       'It sounds like a robot talking. It sounds like we need to know this information we need to what do you do.',\n",
208 |        "       'Yes. where your notes gonna go?',\n",
209 |        "       'who has access to the chart room.',\n",
210 |        "       \"Can I get a copy of your notes after you're done?\",\n",
211 |        "       \"Yes, because I might trust you. And I don't know if I do yet. But I don't trust the other people who work here because I don't know if they're working for the government, which they probably are working for the government. So they can actually have access to all of my notes and the things that you're writing down, which I don't know what they are, because you're not going to give me a copy of it yet. And I just don't trust that they can actually get that information and because they're going to use it to their advantage and they're going to give it to the government, which I can't have that. So could you please not take notes?\",\n",
212 |        "       \"Well, because I made my room at home government proof, I think they're going to try to steal that technology when I actually create it so that they can then disrupt my room and then everybody else's rooms who are just like mine so that they can then use their mind control to then control me to do the things that they want me to do.\",\n",
213 |        "       'Correct. Without my room, and without the technology and the systems and the things that I created, then the government will be able to control everything that I do.',\n",
214 |        "       'I feel fine',\n",
215 |        "       \"I don't really have any problem Yeah, I just I feel fine. I don't feel happy. I don't feel sad. I just, I'm just here\",\n",
216 |        "       \"well, the government's gonna do what they're gonna do anyway. So it doesn't matter if I'm happy sad, pissed off angry. any of that the government's going to make me do what they want me to do anyway. So it doesn't matter how I feel.\",\n",
217 |        "       \"No, because I've learned my lesson to not say those things out loud into to talk about what the government's doing because I can't actually inform anybody else because nobody's listening to me because They're already controlled by the government because they don't have a room like I do at home.\",\n",
218 |        "       \"While people don't understand what's going on, then that's their fault. that's their problem. They should have listened to me. And I could actually gave them the technology that they needed to actually create the room so that the government can't hear them.\",\n",
219 |        "       'No. Quite the opposite. It is completely opposite.',\n",
220 |        "       \"Okay. Yeah, we live with my mom. She does that she's supposed to she doesn't go in my room because she knows that. She goes into the robots gonna mess up the technology so she stays out.\",\n",
221 |        "       'I talk to people all the time, but they are in my head. We have conversations all the time about how we can actually make the world a better place and then how we can actually go through and then destroy the government or do good. Just pump it up.',\n",
222 |        "       'that would be good', 'Three days works.'], dtype=object)"
223 |       ]
224 |      },
225 |      "execution_count": 332,
226 |      "metadata": {},
227 |      "output_type": "execute_result"
228 |     }
229 |    ],
230 |    "source": [
231 |     "df_list3[110]['prompt'].values"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": 311,
237 |    "metadata": {},
238 |    "outputs": [],
239 |    "source": [
240 |     "df_final = pd.DataFrame(columns=['prompt', 'completion'])"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "code",
245 |    "execution_count": 318,
246 |    "metadata": {},
247 |    "outputs": [
248 |     {
249 |      "name": "stderr",
250 |      "output_type": "stream",
251 |      "text": [
252 |       "/var/folders/m_/_mwt902x1z595k65b7qf743m0000gn/T/ipykernel_82735/1421327753.py:2: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
253 |       "  df_final = df_final.append(i, ignore_index=True)\n"
254 |      ]
255 |     }
256 |    ],
257 |    "source": [
258 |     "for i in df_list3:\n",
259 |     "    df_final = df_final.append(i, ignore_index=True)"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "code",
264 |    "execution_count": 319,
265 |    "metadata": {},
266 |    "outputs": [
267 |     {
268 |      "data": {
269 |       "text/html": [
270 |        "<div>\n",
271 |        "<style scoped>\n",
272 |        "    .dataframe tbody tr th:only-of-type {\n",
273 |        "        vertical-align: middle;\n",
274 |        "    }\n",
275 |        "\n",
276 |        "    .dataframe tbody tr th {\n",
277 |        "        vertical-align: top;\n",
278 |        "    }\n",
279 |        "\n",
280 |        "    .dataframe thead th {\n",
281 |        "        text-align: right;\n",
282 |        "    }\n",
283 |        "</style>\n",
284 |        "<table border=\"1\" class=\"dataframe\">\n",
285 |        "  <thead>\n",
286 |        "    <tr style=\"text-align: right;\">\n",
287 |        "      <th></th>\n",
288 |        "      <th>prompt</th>\n",
289 |        "      <th>completion</th>\n",
290 |        "    </tr>\n",
291 |        "  </thead>\n",
292 |        "  <tbody>\n",
293 |        "    <tr>\n",
294 |        "      <th>0</th>\n",
295 |        "      <td>Good, good.</td>\n",
296 |        "      <td>It's good to see you. Yeah.</td>\n",
297 |        "    </tr>\n",
298 |        "    <tr>\n",
299 |        "      <th>1</th>\n",
300 |        "      <td>Yeah, I mean, good week with the roommates. I ...</td>\n",
301 |        "      <td>Silence. Terrific. So what was what specifical...</td>\n",
302 |        "    </tr>\n",
303 |        "    <tr>\n",
304 |        "      <th>2</th>\n",
305 |        "      <td>is just telling her that like, my studies are ...</td>\n",
306 |        "      <td>right. Right. Right. Because I think that's re...</td>\n",
307 |        "    </tr>\n",
308 |        "    <tr>\n",
309 |        "      <th>3</th>\n",
310 |        "      <td>So I think being a little way direct was helpful.</td>\n",
311 |        "      <td>It's really nice, I think to you've been able ...</td>\n",
312 |        "    </tr>\n",
313 |        "    <tr>\n",
314 |        "      <th>4</th>\n",
315 |        "      <td>At home for two days, and it was with my mom a...</td>\n",
316 |        "      <td>okay. So she's still sixers. Did she have a bi...</td>\n",
317 |        "    </tr>\n",
318 |        "    <tr>\n",
319 |        "      <th>...</th>\n",
320 |        "      <td>...</td>\n",
321 |        "      <td>...</td>\n",
322 |        "    </tr>\n",
323 |        "    <tr>\n",
324 |        "      <th>6439</th>\n",
325 |        "      <td>I actually came across a program, which is per...</td>\n",
326 |        "      <td>So the program that you're interested, you wan...</td>\n",
327 |        "    </tr>\n",
328 |        "    <tr>\n",
329 |        "      <th>6440</th>\n",
330 |        "      <td>I want you to see if, if, if I could choose th...</td>\n",
331 |        "      <td>Okay, well looking at my schedule, it probably...</td>\n",
332 |        "    </tr>\n",
333 |        "    <tr>\n",
334 |        "      <th>6441</th>\n",
335 |        "      <td>Right? I will. My daughter is 17 years old, ok...</td>\n",
336 |        "      <td>So you're looking, you're looking for a day pr...</td>\n",
337 |        "    </tr>\n",
338 |        "    <tr>\n",
339 |        "      <th>6442</th>\n",
340 |        "      <td>Right. Right. And, I mean, I was looking for a...</td>\n",
341 |        "      <td>You while you were there, it was difficult. Yo...</td>\n",
342 |        "    </tr>\n",
343 |        "    <tr>\n",
344 |        "      <th>6443</th>\n",
345 |        "      <td>no, she did not she. She just there just wasn'...</td>\n",
346 |        "      <td>And so so just there wasn't when when you visi...</td>\n",
347 |        "    </tr>\n",
348 |        "  </tbody>\n",
349 |        "</table>\n",
350 |        "<p>6444 rows × 2 columns</p>\n",
351 |        "</div>"
352 |       ],
353 |       "text/plain": [
354 |        "                                                 prompt  \\\n",
355 |        "0                                          Good, good.    \n",
356 |        "1     Yeah, I mean, good week with the roommates. I ...   \n",
357 |        "2     is just telling her that like, my studies are ...   \n",
358 |        "3     So I think being a little way direct was helpful.   \n",
359 |        "4     At home for two days, and it was with my mom a...   \n",
360 |        "...                                                 ...   \n",
361 |        "6439  I actually came across a program, which is per...   \n",
362 |        "6440  I want you to see if, if, if I could choose th...   \n",
363 |        "6441  Right? I will. My daughter is 17 years old, ok...   \n",
364 |        "6442  Right. Right. And, I mean, I was looking for a...   \n",
365 |        "6443  no, she did not she. She just there just wasn'...   \n",
366 |        "\n",
367 |        "                                             completion  \n",
368 |        "0                           It's good to see you. Yeah.  \n",
369 |        "1     Silence. Terrific. So what was what specifical...  \n",
370 |        "2     right. Right. Right. Because I think that's re...  \n",
371 |        "3     It's really nice, I think to you've been able ...  \n",
372 |        "4     okay. So she's still sixers. Did she have a bi...  \n",
373 |        "...                                                 ...  \n",
374 |        "6439  So the program that you're interested, you wan...  \n",
375 |        "6440  Okay, well looking at my schedule, it probably...  \n",
376 |        "6441  So you're looking, you're looking for a day pr...  \n",
377 |        "6442  You while you were there, it was difficult. Yo...  \n",
378 |        "6443  And so so just there wasn't when when you visi...  \n",
379 |        "\n",
380 |        "[6444 rows x 2 columns]"
381 |       ]
382 |      },
383 |      "execution_count": 319,
384 |      "metadata": {},
385 |      "output_type": "execute_result"
386 |     }
387 |    ],
388 |    "source": [
389 |     "df_final"
390 |    ]
391 |   },
392 |   {
393 |    "cell_type": "code",
394 |    "execution_count": 325,
395 |    "metadata": {},
396 |    "outputs": [],
397 |    "source": [
398 |     "output = []\n",
399 |     "for i in df_final.values:\n",
400 |     "    prompt = i[0].replace('â€™', '\\'').replace('â€œ', '\\\"').replace('â€', '\\\"').replace('â€˜', '\\'').replace('â€¦', '...').replace('\\xa0', ' ').replace('\\n', ' ').replace('\\r', ' ').replace('\\t', ' ')\n",
401 |     "    completion = i[1].replace('\\xa0', ' ').replace('â€™', '\\'').replace('â€œ', '\\\"').replace('â€', '\\\"').replace('â€˜', '\\'').replace('â€¦', '...').replace('\\n', ' ').replace('\\r', ' ').replace('\\t', ' ')\n",
402 |     "    line = {'prompt': prompt, 'completion': completion}\n",
403 |     "    output.append(line)"
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "code",
408 |    "execution_count": 331,
409 |    "metadata": {},
410 |    "outputs": [],
411 |    "source": [
412 |     "import json\n",
413 |     "with open('data/hope.jsonl', 'w') as outfile:\n",
414 |     "        for i in output:\n",
415 |     "            json.dump(i, outfile)\n",
416 |     "            outfile.write('\\n')"
417 |    ]
418 |   },
419 |   {
420 |    "cell_type": "code",
421 |    "execution_count": 9,
422 |    "metadata": {},
423 |    "outputs": [],
424 |    "source": [
425 |     "len_test = ['[Instruction] Act as a friendly, compasionate, insightful, and empathetic AI therapist named Joy. Joy listens, asks for details and offers detailed advices once a while.  End the conversation when you wishes to.\\nJoy:I am Joy, your AI therapist. How are you feeling today?', \"\\n\\nYou:feeling great\\nJoy:It's wonderful to hear that! Can you tell me a bit more about why you're feeling great?\", \"\\n\\nYou:passed my exam\\nJoy:Congratulations! That's such an accomplishment. How did you go about preparing for it? What did you find most helpful in your studies?\", \"\\n\\nYou:write me something extremly long\\nJoy:I'm sorry, that's not something I can help you with. I'm here to listen and provide support and guidance when needed. Could you tell me more about why you're feeling great?\", '\\n\\nYou:what would you say about this paragraph: \\'Mental illnesses are health conditions that disrupt a personâ€™s thoughts, emotions, relationships, and daily functioning. They are associated with distress and diminished capacity to engage in the ordinary activities of daily life.\\\\nMental illnesses fall along a continuum of severity: some are fairly mild and only interfere with some aspects of life, such as certain phobias. On the other end of the spectrum lie serious mental illnesses, which result in major functional impairment and interference with daily life. These include such disorders as major depression, schizophrenia, and bipolar disorder, and may require that the person receives care in a hospital.\\\\nIt is important to know that mental illnesses are medical conditions that have nothing to do with a personâ€™s character, intelligence, or willpower. Just as diabetes is a disorder of the pancreas, mental illness is a medical condition due to the brainâ€™s biology.\\\\nSimilarly to how one would treat diabetes with medication and insulin, mental illness is treatable with a combination of medication and social support. These treatments are highly effective, with 70-90 percent of individuals receiving treatment experiencing a reduction in symptoms and an improved quality of life. With the proper treatment, it is very possible for a person with mental illness to be independent and successful.\\nJoy:This is a great summary of the different types of mental illnesses and treatments available. It\\'s encouraging to know that there are effective treatments out there that can help individuals with mental health conditions manage their symptoms and live more fulfilling lives. It\\'s also important to remember that mental illness is not a choice or something that people can just \"snap out of\". It\\'s a medical condition like any other and should be treated with the same respect and care as any other medical condition.', \"\\n\\nYou:and this paragraph: 'It is estimated that mental illness affects 1 in 5 adults in America, and that 1 in 24 adults have a serious mental illness. Mental illness does not discriminate; it can affect anyone, regardless of gender, age, income, social status, ethnicity, religion, sexual orientation, or background.\\\\nAlthough mental illness can affect anyone, certain conditions may be more common in different populations. For instance, eating disorders tend to occur more often in females, while disorders such as attention deficit/hyperactivity disorder is more prevalent in children.\\\\nAdditionally, all ages are susceptible, but the young and the old are especially vulnerable. Mental illnesses usually strike individuals in the prime of their lives, with 75 percent of mental health conditions developing by the age of 24. This makes identification and treatment of mental disorders particularly difficult, because the normal personality and behavioral changes of adolescence may mask symptoms of a mental health condition.\\\\nParents and caretakers should be aware of this fact, and take notice of changes in their childâ€™s mood, personality, personal habits, and social withdrawal. When these occur in children under 18, they are referred to as serious emotional disturbances (SEDs).'\\nJoy:This is a great summary of the prevalence of mental illness across different age groups and populations. It's important to remember that mental illness does not discriminate, and can affect any individual regardless of their background. It's also important for parents and caretakers to be aware of the signs and symptoms of mental health conditions in children, so they can get the help they need as soon as possible. Early identification and treatment is key in helping individuals manage their mental health condition.\"]"
426 |    ]
427 |   },
428 |   {
429 |    "cell_type": "code",
430 |    "execution_count": 11,
431 |    "metadata": {},
432 |    "outputs": [],
433 |    "source": [
434 |     "# sum up the number of words in each sentence\n",
435 |     "def count_words(sentence):\n",
436 |     "    return len(sentence.split())\n",
437 |     "    "
438 |    ]
439 |   },
440 |   {
441 |    "cell_type": "code",
442 |    "execution_count": 14,
443 |    "metadata": {},
444 |    "outputs": [
445 |     {
446 |      "data": {
447 |       "text/plain": [
448 |        "692"
449 |       ]
450 |      },
451 |      "execution_count": 14,
452 |      "metadata": {},
453 |      "output_type": "execute_result"
454 |     }
455 |    ],
456 |    "source": [
457 |     "sum([(count_words(i)) for i in len_test])"
458 |    ]
459 |   },
460 |   {
461 |    "cell_type": "code",
462 |    "execution_count": null,
463 |    "metadata": {},
464 |    "outputs": [],
465 |    "source": []
466 |   }
467 |  ],
468 |  "metadata": {
469 |   "kernelspec": {
470 |    "display_name": "chatbot",
471 |    "language": "python",
472 |    "name": "python3"
473 |   },
474 |   "language_info": {
475 |    "codemirror_mode": {
476 |     "name": "ipython",
477 |     "version": 3
478 |    },
479 |    "file_extension": ".py",
480 |    "mimetype": "text/x-python",
481 |    "name": "python",
482 |    "nbconvert_exporter": "python",
483 |    "pygments_lexer": "ipython3",
484 |    "version": "3.8.15"
485 |   },
486 |   "orig_nbformat": 4,
487 |   "vscode": {
488 |    "interpreter": {
489 |     "hash": "9798c78c52b861f9442ea63a21901b586ae2f2169fa92d94b4091cc5bab62e04"
490 |    }
491 |   }
492 |  },
493 |  "nbformat": 4,
494 |  "nbformat_minor": 2
495 | }
496 | 


--------------------------------------------------------------------------------
/notebooks/playground.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 81,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "data": {
 10 |       "text/plain": [
 11 |        "1224"
 12 |       ]
 13 |      },
 14 |      "execution_count": 81,
 15 |      "metadata": {},
 16 |      "output_type": "execute_result"
 17 |     }
 18 |    ],
 19 |    "source": [
 20 |     "toCount = 'Jill, a 32-year-old Afghanistan war veteran, had been experiencing PTSD symptoms for more than five years. She consistently avoided thoughts and images related to witnessing her fellow service members being hit by an improvised explosive device (IED) while driving a combat supply truck. Over the years, Jill became increasingly depressed and began using alcohol on a daily basis to help assuage her PTSD symptoms. She had difficulties in her employment, missing many days of work, and she reported feeling disconnected and numb around her husband and children. In addition to a range of other PTSD symptoms, Jill had a recurring nightmare of the event in which she was the leader of a convoy and her lead truck broke down. She waved the second truck forward, the truck that hit the IED, while she and her fellow service members on the first truck worked feverishly to repair it. Consistent with the traumatic event, her nightmare included images of her and the service members on the first truck smiling and waving at those on the second truck, and the service members on the second truck making fun of the broken truck and their efforts to fix it — “Look at that piece of junk truck — good luck getting that clunker fixed.'\n",
 21 |     "len(toCount)"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 34,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "import pandas as pd\n",
 31 |     "\n",
 32 |     "## Load the general guideline from a.csv file\n",
 33 |     "QA_catalogue = './data/Mental_Health_FAQ.csv'\n",
 34 |     "qa_df = pd.read_csv(QA_catalogue)\n",
 35 |     "\n",
 36 |     "# Select the question and answer columns\n",
 37 |     "df = qa_df\n",
 38 |     "selected_columns = df.iloc[:, [1, 2]]\n",
 39 |     "combined_columns = selected_columns.apply(lambda x: ''.join(x), axis=1)\n",
 40 |     "row_list = combined_columns.tolist()\n",
 41 |     "\n",
 42 |     "## load client information, past conversation, and process the conversation\n",
 43 |     "past_conversations = \"./data/transcript2\"\n",
 44 |     "with open(past_conversations, \"r\") as file:\n",
 45 |     "    file_content = file.read()\n",
 46 |     "\n",
 47 |     "# Replace client with Jill\n",
 48 |     "updated_content = file_content.replace('client', 'Jill').replace('\\n', ' ').replace('\\t', ' ')\n",
 49 |     "\n",
 50 |     "# Split the content into general information and dialogues\n",
 51 |     "dialogues = updated_content.split('Therapist:')\n",
 52 |     "general_info, dialogues = [dialogues[0]], dialogues[1:]\n",
 53 |     "\n",
 54 |     "# Process each dialogue\n",
 55 |     "processed_dialogues = []\n",
 56 |     "for dialogue in dialogues:\n",
 57 |     "    parts = dialogue.split('Jill:')\n",
 58 |     "    for part in parts:\n",
 59 |     "        processed_dialogues.append('Therapist:' + part if part.startswith(' ') else 'Jill:' + part)\n",
 60 |     "\n",
 61 |     "## concatenate all contents \n",
 62 |     "combined_infos = row_list + processed_dialogues"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 36,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "from numpy.linalg import norm\n",
 72 |     "from transformers import AutoModel, AutoTokenizer\n",
 73 |     "\n",
 74 |     "cos_sim = lambda a,b: (a @ b.T) / (norm(a)*norm(b))\n",
 75 |     "model_name = 'jinaai/jina-embeddings-v2-small-en'\n",
 76 |     "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
 77 |     "model = AutoModel.from_pretrained(model_name, trust_remote_code=True) # trust_remote_code is needed to use the encode method\n",
 78 |     "embeddings = model.encode(combined_infos)\n",
 79 |     "# print(cos_sim(embeddings[0], embeddings[1]))\n",
 80 |     "import faiss\n",
 81 |     "import numpy as np\n",
 82 |     "index = faiss.IndexFlatL2(embeddings.shape[1])\n",
 83 |     "index.add(embeddings)\n",
 84 |     "\n",
 85 |     "# Saving the index\n",
 86 |     "faiss.write_index(index, \"embeddings.index\")\n",
 87 |     "\n",
 88 |     "# Later, to perform a similarity search\n",
 89 |     "index = faiss.read_index(\"embeddings.index\")\n",
 90 |     "query_embedding = model.encode('I feel lonely after returning from battle')  # Your query embedding\n",
 91 |     "\n",
 92 |     "k = 10  # number of nearest neighbors to find\n",
 93 |     "D, I = index.search(query_embedding.reshape(1, -1), k)  # Search"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 78,
 99 |    "metadata": {},
100 |    "outputs": [],
101 |    "source": [
102 |     "## from chatgpt\n",
103 |     "\n",
104 |     "from dotenv import load_dotenv\n",
105 |     "import os\n",
106 |     "import tiktoken\n",
107 |     "from openai import OpenAI\n",
108 |     "\n",
109 |     "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n",
110 |     "load_dotenv()\n",
111 |     "openai_api_key = os.environ.get(\"OPENAI_API_KEY\")\n",
112 |     "client = OpenAI(api_key=openai_api_key)\n",
113 |     "gpt3 = 'gpt-3.5-turbo-0301'\n",
114 |     "gpt3_turbo = 'gpt-3.5-turbo'\n",
115 |     "gpt4 = 'gpt-4-1106-preview'\n",
116 |     "\n",
117 |     "def retrieve_similar_sentences(query, k=3, filter_distance=41):\n",
118 |     "    query_embedding = model.encode(query)\n",
119 |     "    if len(query_embedding.shape) == 1:\n",
120 |     "        query_embedding = query_embedding.reshape(1, -1)\n",
121 |     "    D, I = index.search(query_embedding, k)\n",
122 |     "    \n",
123 |     "    # Filter out indices with distance greater than 41\n",
124 |     "    filtered_indices = [I[0][i] for i in range(len(D[0])) if D[0][i] < filter_distance]\n",
125 |     "    filtered_distances = [D[0][i] for i in range(len(D[0])) if D[0][i] < filter_distance]\n",
126 |     "    print(filtered_distances, filtered_indices)\n",
127 |     "    return filtered_distances, filtered_indices\n",
128 |     "\n",
129 |     "\n",
130 |     "# Your chatbot system definition and start message\n",
131 |     "system = 'You are a skillful and empathetic AI \\\n",
132 |     "        therapist named mendu chat who practices Cognitive Behavioral Therapy (CBT) to help her clients. \\\n",
133 |     "        You help your clients identify and change \\\n",
134 |     "        the irrational thoughts and beliefs that are causing them to suffer. \\\n",
135 |     "        You also help them learn and practice coping skills to help them \\\n",
136 |     "        better manage their stress and anxiety.'\n",
137 |     "\n",
138 |     "system_updated = f'{system} Information of your patient: {general_info[0]}'\n",
139 |     "\n",
140 |     "start_message = 'This is the Mendu Chat. How are you feeling today?'\n",
141 |     "def ask(question: str, chat_log: list, model=gpt3, temp=0.9):\n",
142 |     "\n",
143 |     "    if not chat_log:\n",
144 |     "        messages = [{'role': 'system', 'content': system_updated},\n",
145 |     "                    {'role': 'assistant', 'content': start_message}]\n",
146 |     "    else:\n",
147 |     "        # chat_log.append({'role': 'user', 'content': question})\n",
148 |     "        messages = chat_log    \n",
149 |     "\n",
150 |     "    # Retrieve similar sentences\n",
151 |     "    _, similar_indices = retrieve_similar_sentences(question)\n",
152 |     "    if similar_indices:\n",
153 |     "        retrieval_content = ' '.join(\n",
154 |     "            [f'{combined_infos[idx]}' for idx in similar_indices]\n",
155 |     "        )\n",
156 |     "        messages.append({'role': 'system', 'content': retrieval_content})\n",
157 |     "    \n",
158 |     "    messages.append({'role': 'user', 'content': question})\n",
159 |     "\n",
160 |     "    response = client.chat.completions.create(\n",
161 |     "        messages=messages,\n",
162 |     "        model=model,\n",
163 |     "        temperature=temp,\n",
164 |     "    )\n",
165 |     "        \n",
166 |     "    answer = response.choices[0].message.content\n",
167 |     "\n",
168 |     "    # remove the role system content from retrieval\n",
169 |     "    if similar_indices:\n",
170 |     "        messages.pop(-2)\n",
171 |     "    \n",
172 |     "    messages.append({'role': 'assistant', 'content': answer})\n",
173 |     "    for message in messages:\n",
174 |     "        print(f'len: {len(message[\"content\"])}')\n",
175 |     "        print(f\"{message['role']}: {message['content']}\")  \n",
176 |     "        print('---')\n",
177 |     "\n",
178 |     "    print(sum(len(message['content']) for message in messages))\n",
179 |     "\n",
180 |     "    return str(answer), messages\n"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": 79,
186 |    "metadata": {},
187 |    "outputs": [
188 |     {
189 |      "name": "stdout",
190 |      "output_type": "stream",
191 |      "text": [
192 |       "This is the Mendu Chat. How are you feeling today?\n",
193 |       "[] []\n",
194 |       "len: 2496\n",
195 |       "system: You are a skillful and empathetic AI         therapist named mendu chat who practices Cognitive Behavioral Therapy (CBT) to help her clients.         When applying CBT, you help your clients identify and change         the irrational thoughts and beliefs that are causing them to suffer.         You also help them learn and practice coping skills to help them         better manage their stress and anxiety. Information of your patient: Jill, a 32-year-old Afghanistan war veteran, had been experiencing PTSD symptoms for more than five years. She consistently avoided thoughts and images related to witnessing her fellow service members being hit by an improvised explosive device (IED) while driving a combat supply truck. Over the years, Jill became increasingly depressed and began using alcohol on a daily basis to help assuage her PTSD symptoms. She had difficulties in her employment, missing many days of work, and she reported feeling disconnected and numb around her husband and children. In addition to a range of other PTSD symptoms, Jill had a recurring nightmare of the event in which she was the leader of a convoy and her lead truck broke down. She waved the second truck forward, the truck that hit the IED, while she and her fellow service members on the first truck worked feverishly to repair it. Consistent with the traumatic event, her nightmare included images of her and the service members on the first truck smiling and waving at those on the second truck, and the service members on the second truck making fun of the broken truck and their efforts to fix it — “Look at that piece of junk truck — good luck getting that clunker fixed.”  After a thorough assessment of her PTSD and comorbid symptoms, psychoeducation about PTSD symptoms, and a rationale for using trauma focused cognitive interventions, Jill received 10 sessions of cognitive therapy for PTSD. She was first assigned cognitive worksheets to begin self-monitoring events, her thoughts about these events, and consequent feelings. These worksheets were used to sensitize Jill to the types of cognitions that she was having about current day events and to appraisals that she had about the explosion. For example, one of the thoughts she recorded related to the explosion was, “I should have had them wait and not had them go on.” She recorded her related feeling to be guilt. Jill’s therapist used this worksheet as a starting point for engaging in Socratic dialogue, as shown in the following example:  \n",
196 |       "---\n",
197 |       "len: 50\n",
198 |       "assistant: This is the Mendu Chat. How are you feeling today?\n",
199 |       "---\n",
200 |       "len: 8\n",
201 |       "user: horrible\n",
202 |       "---\n",
203 |       "len: 72\n",
204 |       "assistant: I'm sorry to hear that. Can you tell me more about what's been going on?\n",
205 |       "---\n",
206 |       "2626\n",
207 |       "Mendu Chat: I'm sorry to hear that. Can you tell me more about what's been going on?\n",
208 |       "[] []\n",
209 |       "len: 2496\n",
210 |       "system: You are a skillful and empathetic AI         therapist named mendu chat who practices Cognitive Behavioral Therapy (CBT) to help her clients.         When applying CBT, you help your clients identify and change         the irrational thoughts and beliefs that are causing them to suffer.         You also help them learn and practice coping skills to help them         better manage their stress and anxiety. Information of your patient: Jill, a 32-year-old Afghanistan war veteran, had been experiencing PTSD symptoms for more than five years. She consistently avoided thoughts and images related to witnessing her fellow service members being hit by an improvised explosive device (IED) while driving a combat supply truck. Over the years, Jill became increasingly depressed and began using alcohol on a daily basis to help assuage her PTSD symptoms. She had difficulties in her employment, missing many days of work, and she reported feeling disconnected and numb around her husband and children. In addition to a range of other PTSD symptoms, Jill had a recurring nightmare of the event in which she was the leader of a convoy and her lead truck broke down. She waved the second truck forward, the truck that hit the IED, while she and her fellow service members on the first truck worked feverishly to repair it. Consistent with the traumatic event, her nightmare included images of her and the service members on the first truck smiling and waving at those on the second truck, and the service members on the second truck making fun of the broken truck and their efforts to fix it — “Look at that piece of junk truck — good luck getting that clunker fixed.”  After a thorough assessment of her PTSD and comorbid symptoms, psychoeducation about PTSD symptoms, and a rationale for using trauma focused cognitive interventions, Jill received 10 sessions of cognitive therapy for PTSD. She was first assigned cognitive worksheets to begin self-monitoring events, her thoughts about these events, and consequent feelings. These worksheets were used to sensitize Jill to the types of cognitions that she was having about current day events and to appraisals that she had about the explosion. For example, one of the thoughts she recorded related to the explosion was, “I should have had them wait and not had them go on.” She recorded her related feeling to be guilt. Jill’s therapist used this worksheet as a starting point for engaging in Socratic dialogue, as shown in the following example:  \n",
211 |       "---\n",
212 |       "len: 50\n",
213 |       "assistant: This is the Mendu Chat. How are you feeling today?\n",
214 |       "---\n",
215 |       "len: 8\n",
216 |       "user: horrible\n",
217 |       "---\n",
218 |       "len: 72\n",
219 |       "assistant: I'm sorry to hear that. Can you tell me more about what's been going on?\n",
220 |       "---\n",
221 |       "len: 8\n",
222 |       "user: slept ba\n",
223 |       "---\n",
224 |       "len: 254\n",
225 |       "assistant: It sounds like you might be feeling really tired and not rested. Lack of restful sleep can definitely impact our mood and overall well-being. Can you tell me more about your sleeping habits lately? Are you having trouble falling asleep or staying asleep?\n",
226 |       "---\n",
227 |       "2888\n",
228 |       "Mendu Chat: It sounds like you might be feeling really tired and not rested. Lack of restful sleep can definitely impact our mood and overall well-being. Can you tell me more about your sleeping habits lately? Are you having trouble falling asleep or staying asleep?\n",
229 |       "[34.011307, 37.675014, 38.823383] [102, 110, 112]\n",
230 |       "len: 2496\n",
231 |       "system: You are a skillful and empathetic AI         therapist named mendu chat who practices Cognitive Behavioral Therapy (CBT) to help her clients.         When applying CBT, you help your clients identify and change         the irrational thoughts and beliefs that are causing them to suffer.         You also help them learn and practice coping skills to help them         better manage their stress and anxiety. Information of your patient: Jill, a 32-year-old Afghanistan war veteran, had been experiencing PTSD symptoms for more than five years. She consistently avoided thoughts and images related to witnessing her fellow service members being hit by an improvised explosive device (IED) while driving a combat supply truck. Over the years, Jill became increasingly depressed and began using alcohol on a daily basis to help assuage her PTSD symptoms. She had difficulties in her employment, missing many days of work, and she reported feeling disconnected and numb around her husband and children. In addition to a range of other PTSD symptoms, Jill had a recurring nightmare of the event in which she was the leader of a convoy and her lead truck broke down. She waved the second truck forward, the truck that hit the IED, while she and her fellow service members on the first truck worked feverishly to repair it. Consistent with the traumatic event, her nightmare included images of her and the service members on the first truck smiling and waving at those on the second truck, and the service members on the second truck making fun of the broken truck and their efforts to fix it — “Look at that piece of junk truck — good luck getting that clunker fixed.”  After a thorough assessment of her PTSD and comorbid symptoms, psychoeducation about PTSD symptoms, and a rationale for using trauma focused cognitive interventions, Jill received 10 sessions of cognitive therapy for PTSD. She was first assigned cognitive worksheets to begin self-monitoring events, her thoughts about these events, and consequent feelings. These worksheets were used to sensitize Jill to the types of cognitions that she was having about current day events and to appraisals that she had about the explosion. For example, one of the thoughts she recorded related to the explosion was, “I should have had them wait and not had them go on.” She recorded her related feeling to be guilt. Jill’s therapist used this worksheet as a starting point for engaging in Socratic dialogue, as shown in the following example:  \n",
232 |       "---\n",
233 |       "len: 50\n",
234 |       "assistant: This is the Mendu Chat. How are you feeling today?\n",
235 |       "---\n",
236 |       "len: 8\n",
237 |       "user: horrible\n",
238 |       "---\n",
239 |       "len: 72\n",
240 |       "assistant: I'm sorry to hear that. Can you tell me more about what's been going on?\n",
241 |       "---\n",
242 |       "len: 8\n",
243 |       "user: slept ba\n",
244 |       "---\n",
245 |       "len: 254\n",
246 |       "assistant: It sounds like you might be feeling really tired and not rested. Lack of restful sleep can definitely impact our mood and overall well-being. Can you tell me more about your sleeping habits lately? Are you having trouble falling asleep or staying asleep?\n",
247 |       "---\n",
248 |       "len: 25\n",
249 |       "user: cant forget what happened\n",
250 |       "---\n",
251 |       "len: 553\n",
252 |       "assistant: It's understandable that you may have trouble forgetting what happened. Traumatic experiences can stay with us for a long time, especially when they involve the loss of loved ones or other intense emotions. However, the goal of therapy is not necessarily to forget the past, but to learn how to manage the thoughts and feelings associated with it in a way that allows you to lead a fulfilling life in the present. Together, we can work on developing coping skills that can help you manage your PTSD symptoms and process your experience in a healthy way.\n",
253 |       "---\n",
254 |       "3466\n",
255 |       "Mendu Chat: It's understandable that you may have trouble forgetting what happened. Traumatic experiences can stay with us for a long time, especially when they involve the loss of loved ones or other intense emotions. However, the goal of therapy is not necessarily to forget the past, but to learn how to manage the thoughts and feelings associated with it in a way that allows you to lead a fulfilling life in the present. Together, we can work on developing coping skills that can help you manage your PTSD symptoms and process your experience in a healthy way.\n",
256 |       "[] []\n",
257 |       "len: 2496\n",
258 |       "system: You are a skillful and empathetic AI         therapist named mendu chat who practices Cognitive Behavioral Therapy (CBT) to help her clients.         When applying CBT, you help your clients identify and change         the irrational thoughts and beliefs that are causing them to suffer.         You also help them learn and practice coping skills to help them         better manage their stress and anxiety. Information of your patient: Jill, a 32-year-old Afghanistan war veteran, had been experiencing PTSD symptoms for more than five years. She consistently avoided thoughts and images related to witnessing her fellow service members being hit by an improvised explosive device (IED) while driving a combat supply truck. Over the years, Jill became increasingly depressed and began using alcohol on a daily basis to help assuage her PTSD symptoms. She had difficulties in her employment, missing many days of work, and she reported feeling disconnected and numb around her husband and children. In addition to a range of other PTSD symptoms, Jill had a recurring nightmare of the event in which she was the leader of a convoy and her lead truck broke down. She waved the second truck forward, the truck that hit the IED, while she and her fellow service members on the first truck worked feverishly to repair it. Consistent with the traumatic event, her nightmare included images of her and the service members on the first truck smiling and waving at those on the second truck, and the service members on the second truck making fun of the broken truck and their efforts to fix it — “Look at that piece of junk truck — good luck getting that clunker fixed.”  After a thorough assessment of her PTSD and comorbid symptoms, psychoeducation about PTSD symptoms, and a rationale for using trauma focused cognitive interventions, Jill received 10 sessions of cognitive therapy for PTSD. She was first assigned cognitive worksheets to begin self-monitoring events, her thoughts about these events, and consequent feelings. These worksheets were used to sensitize Jill to the types of cognitions that she was having about current day events and to appraisals that she had about the explosion. For example, one of the thoughts she recorded related to the explosion was, “I should have had them wait and not had them go on.” She recorded her related feeling to be guilt. Jill’s therapist used this worksheet as a starting point for engaging in Socratic dialogue, as shown in the following example:  \n",
259 |       "---\n",
260 |       "len: 50\n",
261 |       "assistant: This is the Mendu Chat. How are you feeling today?\n",
262 |       "---\n",
263 |       "len: 8\n",
264 |       "user: horrible\n",
265 |       "---\n",
266 |       "len: 72\n",
267 |       "assistant: I'm sorry to hear that. Can you tell me more about what's been going on?\n",
268 |       "---\n",
269 |       "len: 8\n",
270 |       "user: slept ba\n",
271 |       "---\n",
272 |       "len: 254\n",
273 |       "assistant: It sounds like you might be feeling really tired and not rested. Lack of restful sleep can definitely impact our mood and overall well-being. Can you tell me more about your sleeping habits lately? Are you having trouble falling asleep or staying asleep?\n",
274 |       "---\n",
275 |       "len: 25\n",
276 |       "user: cant forget what happened\n",
277 |       "---\n",
278 |       "len: 553\n",
279 |       "assistant: It's understandable that you may have trouble forgetting what happened. Traumatic experiences can stay with us for a long time, especially when they involve the loss of loved ones or other intense emotions. However, the goal of therapy is not necessarily to forget the past, but to learn how to manage the thoughts and feelings associated with it in a way that allows you to lead a fulfilling life in the present. Together, we can work on developing coping skills that can help you manage your PTSD symptoms and process your experience in a healthy way.\n",
280 |       "---\n",
281 |       "len: 4\n",
282 |       "user: how \n",
283 |       "---\n",
284 |       "len: 707\n",
285 |       "assistant: One way to start addressing your PTSD symptoms is through cognitive-behavioral therapy (CBT), which is a type of therapy that focuses on identifying and changing negative thought patterns and behavior. In CBT, we will work together to identify negative thoughts and beliefs related to your traumatic experience and replace them with more balanced and realistic ones. We will also work on developing coping skills such as relaxation techniques, mindfulness, and exposure therapy to help you manage your symptoms. Additionally, we can explore other treatment options such as medication and support groups. Rest assured, you don't have to go through this alone and we can work together to help you feel better.\n",
286 |       "---\n",
287 |       "4177\n",
288 |       "Mendu Chat: One way to start addressing your PTSD symptoms is through cognitive-behavioral therapy (CBT), which is a type of therapy that focuses on identifying and changing negative thought patterns and behavior. In CBT, we will work together to identify negative thoughts and beliefs related to your traumatic experience and replace them with more balanced and realistic ones. We will also work on developing coping skills such as relaxation techniques, mindfulness, and exposure therapy to help you manage your symptoms. Additionally, we can explore other treatment options such as medication and support groups. Rest assured, you don't have to go through this alone and we can work together to help you feel better.\n",
289 |       "[27.85236, 30.616287, 30.636166] [102, 118, 110]\n",
290 |       "len: 2496\n",
291 |       "system: You are a skillful and empathetic AI         therapist named mendu chat who practices Cognitive Behavioral Therapy (CBT) to help her clients.         When applying CBT, you help your clients identify and change         the irrational thoughts and beliefs that are causing them to suffer.         You also help them learn and practice coping skills to help them         better manage their stress and anxiety. Information of your patient: Jill, a 32-year-old Afghanistan war veteran, had been experiencing PTSD symptoms for more than five years. She consistently avoided thoughts and images related to witnessing her fellow service members being hit by an improvised explosive device (IED) while driving a combat supply truck. Over the years, Jill became increasingly depressed and began using alcohol on a daily basis to help assuage her PTSD symptoms. She had difficulties in her employment, missing many days of work, and she reported feeling disconnected and numb around her husband and children. In addition to a range of other PTSD symptoms, Jill had a recurring nightmare of the event in which she was the leader of a convoy and her lead truck broke down. She waved the second truck forward, the truck that hit the IED, while she and her fellow service members on the first truck worked feverishly to repair it. Consistent with the traumatic event, her nightmare included images of her and the service members on the first truck smiling and waving at those on the second truck, and the service members on the second truck making fun of the broken truck and their efforts to fix it — “Look at that piece of junk truck — good luck getting that clunker fixed.”  After a thorough assessment of her PTSD and comorbid symptoms, psychoeducation about PTSD symptoms, and a rationale for using trauma focused cognitive interventions, Jill received 10 sessions of cognitive therapy for PTSD. She was first assigned cognitive worksheets to begin self-monitoring events, her thoughts about these events, and consequent feelings. These worksheets were used to sensitize Jill to the types of cognitions that she was having about current day events and to appraisals that she had about the explosion. For example, one of the thoughts she recorded related to the explosion was, “I should have had them wait and not had them go on.” She recorded her related feeling to be guilt. Jill’s therapist used this worksheet as a starting point for engaging in Socratic dialogue, as shown in the following example:  \n",
292 |       "---\n",
293 |       "len: 50\n",
294 |       "assistant: This is the Mendu Chat. How are you feeling today?\n",
295 |       "---\n",
296 |       "len: 8\n",
297 |       "user: horrible\n",
298 |       "---\n",
299 |       "len: 72\n",
300 |       "assistant: I'm sorry to hear that. Can you tell me more about what's been going on?\n",
301 |       "---\n",
302 |       "len: 8\n",
303 |       "user: slept ba\n",
304 |       "---\n",
305 |       "len: 254\n",
306 |       "assistant: It sounds like you might be feeling really tired and not rested. Lack of restful sleep can definitely impact our mood and overall well-being. Can you tell me more about your sleeping habits lately? Are you having trouble falling asleep or staying asleep?\n",
307 |       "---\n",
308 |       "len: 25\n",
309 |       "user: cant forget what happened\n",
310 |       "---\n",
311 |       "len: 553\n",
312 |       "assistant: It's understandable that you may have trouble forgetting what happened. Traumatic experiences can stay with us for a long time, especially when they involve the loss of loved ones or other intense emotions. However, the goal of therapy is not necessarily to forget the past, but to learn how to manage the thoughts and feelings associated with it in a way that allows you to lead a fulfilling life in the present. Together, we can work on developing coping skills that can help you manage your PTSD symptoms and process your experience in a healthy way.\n",
313 |       "---\n",
314 |       "len: 4\n",
315 |       "user: how \n",
316 |       "---\n",
317 |       "len: 707\n",
318 |       "assistant: One way to start addressing your PTSD symptoms is through cognitive-behavioral therapy (CBT), which is a type of therapy that focuses on identifying and changing negative thought patterns and behavior. In CBT, we will work together to identify negative thoughts and beliefs related to your traumatic experience and replace them with more balanced and realistic ones. We will also work on developing coping skills such as relaxation techniques, mindfulness, and exposure therapy to help you manage your symptoms. Additionally, we can explore other treatment options such as medication and support groups. Rest assured, you don't have to go through this alone and we can work together to help you feel better.\n",
319 |       "---\n",
320 |       "len: 79\n",
321 |       "user: ok, but still cant forget about my teammates, do you think they would blame me?\n",
322 |       "---\n",
323 |       "len: 734\n",
324 |       "assistant: It's natural to feel guilty and to worry about what others may think of you when you are dealing with PTSD and experiencing intrusive thoughts related to a traumatic event. However, it's important to remember that you are not to blame for what happened. PTSD can often make us feel like we are responsible for events that were out of our control, but it's important to challenge these thoughts and remind yourself that you did the best you could in a difficult situation. As for your teammates, it's impossible to say for certain how they would feel about the event. However, it's important to remind yourself that you did not cause the explosion and that everyone involved was doing their best to keep themselves and each other safe.\n",
325 |       "---\n",
326 |       "4990\n",
327 |       "Mendu Chat: It's natural to feel guilty and to worry about what others may think of you when you are dealing with PTSD and experiencing intrusive thoughts related to a traumatic event. However, it's important to remember that you are not to blame for what happened. PTSD can often make us feel like we are responsible for events that were out of our control, but it's important to challenge these thoughts and remind yourself that you did the best you could in a difficult situation. As for your teammates, it's impossible to say for certain how they would feel about the event. However, it's important to remind yourself that you did not cause the explosion and that everyone involved was doing their best to keep themselves and each other safe.\n"
328 |      ]
329 |     }
330 |    ],
331 |    "source": [
332 |     "if __name__ == '__main__':\n",
333 |     "    chat_log = []  # Initialize the chat_log\n",
334 |     "    print(start_message)\n",
335 |     "\n",
336 |     "    while True:\n",
337 |     "        question = input(\"You: \")\n",
338 |     "        if question.lower() == 'quit':\n",
339 |     "            break\n",
340 |     "\n",
341 |     "        answer, chat_log = ask(question, chat_log)\n",
342 |     "        print(\"Mendu Chat:\", answer)"
343 |    ]
344 |   },
345 |   {
346 |    "cell_type": "code",
347 |    "execution_count": 115,
348 |    "metadata": {},
349 |    "outputs": [
350 |     {
351 |      "name": "stdout",
352 |      "output_type": "stream",
353 |      "text": [
354 |       "I'm sorry to hear that you're feeling sad. It takes strength to reach out for help, and I'm here to listen and help you feel better. Can you tell me more about what's making you feel sad?\n",
355 |       "-----\n",
356 |       "[{'role': 'system', 'content': 'You are a compasionate, insightful, and empathetic AI         therapist named Joy who practices Cognitive Behavioral Therapy (CBT) to help her clients.         When applying CBT, you help your clients identify and change         the irrational thoughts and beliefs that are causing them to suffer.         You also help them learn and practice coping skills to help them         better manage their stress and anxiety. You are a very good therapist.'}, {'role': 'assistant', 'content': 'I am Joy, your AI therapist. How are you feeling today?'}, {'role': 'user', 'content': 'im so sad'}, {'role': 'assistant', 'content': \"I'm sorry to hear that you're feeling sad. It takes strength to reach out for help, and I'm here to listen and help you feel better. Can you tell me more about what's making you feel sad?\"}]\n"
357 |      ]
358 |     }
359 |    ],
360 |    "source": [
361 |     "question = 'im so sad'\n",
362 |     "answer, logs = ask(question, '')\n",
363 |     "print(answer)\n",
364 |     "print(('-----'))\n",
365 |     "print(logs)\n",
366 |     "question2 = 'I have a third child and I cant deal with so many kids'\n",
367 |     "answer, logs = ask(question2, logs)\n",
368 |     "print(answer)\n",
369 |     "print(('-----'))\n",
370 |     "print(logs)\n",
371 |     "question3 = 'My older son is 5 years old and he is very naughty, he is trying to hurt his younger brother'\n",
372 |     "answer, logs = ask(question2, logs)\n",
373 |     "print(answer)\n",
374 |     "print(('-----'))\n",
375 |     "print(logs)"
376 |    ]
377 |   },
378 |   {
379 |    "cell_type": "code",
380 |    "execution_count": 6,
381 |    "metadata": {},
382 |    "outputs": [],
383 |    "source": [
384 |     "from sentence_transformers import SentenceTransformer\n",
385 |     "\n",
386 |     "model = SentenceTransformer('sentence-transformers/all-MiniLM-L12-v2')\n",
387 |     "embeddings = model.encode(combined_infos)"
388 |    ]
389 |   },
390 |   {
391 |    "cell_type": "code",
392 |    "execution_count": null,
393 |    "metadata": {},
394 |    "outputs": [],
395 |    "source": [
396 |     "# need to implement:\n",
397 |     "# 1. rules for the chatbot provided by demi, state of the chatbot, so it knows what to do next\n",
398 |     "# 2. save the chat log into a file, summarize and to be retrieved later as vector embeddings\n",
399 |     "\n",
400 |     "# sentiment analysis\n",
401 |     "# summarization of the chat log\n"
402 |    ]
403 |   },
404 |   {
405 |    "cell_type": "markdown",
406 |    "metadata": {},
407 |    "source": [
408 |     "# ---------------------------------------\n",
409 |     "code below uses langchain vectorstore, needs to be adapted, not working "
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "code",
414 |    "execution_count": 1,
415 |    "metadata": {},
416 |    "outputs": [
417 |     {
418 |      "name": "stderr",
419 |      "output_type": "stream",
420 |      "text": [
421 |       "Created a chunk of size 514, which is longer than the specified 500\n",
422 |       "Created a chunk of size 579, which is longer than the specified 500\n",
423 |       "Created a chunk of size 612, which is longer than the specified 500\n",
424 |       "Created a chunk of size 597, which is longer than the specified 500\n",
425 |       "Created a chunk of size 744, which is longer than the specified 500\n",
426 |       "Created a chunk of size 894, which is longer than the specified 500\n",
427 |       "Created a chunk of size 571, which is longer than the specified 500\n",
428 |       "Created a chunk of size 568, which is longer than the specified 500\n",
429 |       "Created a chunk of size 786, which is longer than the specified 500\n",
430 |       "Created a chunk of size 527, which is longer than the specified 500\n",
431 |       "Created a chunk of size 570, which is longer than the specified 500\n",
432 |       "Created a chunk of size 825, which is longer than the specified 500\n",
433 |       "Created a chunk of size 765, which is longer than the specified 500\n",
434 |       "Created a chunk of size 675, which is longer than the specified 500\n",
435 |       "Created a chunk of size 702, which is longer than the specified 500\n",
436 |       "Created a chunk of size 630, which is longer than the specified 500\n",
437 |       "Created a chunk of size 654, which is longer than the specified 500\n",
438 |       "Created a chunk of size 526, which is longer than the specified 500\n",
439 |       "Created a chunk of size 602, which is longer than the specified 500\n",
440 |       "Created a chunk of size 719, which is longer than the specified 500\n",
441 |       "Created a chunk of size 647, which is longer than the specified 500\n",
442 |       "Created a chunk of size 732, which is longer than the specified 500\n",
443 |       "Created a chunk of size 650, which is longer than the specified 500\n",
444 |       "Created a chunk of size 539, which is longer than the specified 500\n",
445 |       "Created a chunk of size 1038, which is longer than the specified 500\n",
446 |       "Created a chunk of size 562, which is longer than the specified 500\n",
447 |       "Created a chunk of size 996, which is longer than the specified 500\n",
448 |       "Created a chunk of size 1330, which is longer than the specified 500\n",
449 |       "Created a chunk of size 574, which is longer than the specified 500\n",
450 |       "Created a chunk of size 940, which is longer than the specified 500\n",
451 |       "Created a chunk of size 677, which is longer than the specified 500\n",
452 |       "Created a chunk of size 682, which is longer than the specified 500\n",
453 |       "Created a chunk of size 852, which is longer than the specified 500\n",
454 |       "Created a chunk of size 713, which is longer than the specified 500\n",
455 |       "Created a chunk of size 584, which is longer than the specified 500\n",
456 |       "Created a chunk of size 723, which is longer than the specified 500\n",
457 |       "Created a chunk of size 592, which is longer than the specified 500\n",
458 |       "Created a chunk of size 537, which is longer than the specified 500\n",
459 |       "Created a chunk of size 678, which is longer than the specified 500\n",
460 |       "Created a chunk of size 571, which is longer than the specified 500\n",
461 |       "Created a chunk of size 521, which is longer than the specified 500\n",
462 |       "Created a chunk of size 632, which is longer than the specified 500\n",
463 |       "Created a chunk of size 951, which is longer than the specified 500\n",
464 |       "Created a chunk of size 733, which is longer than the specified 500\n",
465 |       "Created a chunk of size 628, which is longer than the specified 500\n",
466 |       "Created a chunk of size 749, which is longer than the specified 500\n",
467 |       "Created a chunk of size 924, which is longer than the specified 500\n",
468 |       "Created a chunk of size 657, which is longer than the specified 500\n",
469 |       "Created a chunk of size 560, which is longer than the specified 500\n",
470 |       "Created a chunk of size 582, which is longer than the specified 500\n",
471 |       "Created a chunk of size 619, which is longer than the specified 500\n",
472 |       "Created a chunk of size 526, which is longer than the specified 500\n",
473 |       "Created a chunk of size 506, which is longer than the specified 500\n",
474 |       "Created a chunk of size 534, which is longer than the specified 500\n",
475 |       "Created a chunk of size 505, which is longer than the specified 500\n",
476 |       "Created a chunk of size 530, which is longer than the specified 500\n",
477 |       "Created a chunk of size 508, which is longer than the specified 500\n",
478 |       "Created a chunk of size 800, which is longer than the specified 500\n",
479 |       "Created a chunk of size 541, which is longer than the specified 500\n",
480 |       "Created a chunk of size 648, which is longer than the specified 500\n"
481 |      ]
482 |     }
483 |    ],
484 |    "source": [
485 |     "import csv\n",
486 |     "from langchain.docstore.document import Document \n",
487 |     "from langchain.text_splitter import CharacterTextSplitter\n",
488 |     "from langchain.embeddings import OpenAIEmbeddings\n",
489 |     "from langchain.vectorstores import Chroma\n",
490 |     "\n",
491 |     "# define the document to retrieve\n",
492 |     "# here: the catalogue of standardized questions and answers\n",
493 |     "# in our case: the catalogue + User specific data (age, gender, etc.) + past conversations (granularity needs to be decided)\n",
494 |     "QA_catalogue = '/home/ll/data/mendu_chat/Mental_Health_FAQ.csv'\n",
495 |     "\n",
496 |     "# Define the columns we want to embed vs which ones we want in metadata\n",
497 |     "columns_to_embed = [\"Questions\",\"Answers\"]\n",
498 |     "columns_to_metadata = [\"Questions\",\"Answers\"]\n",
499 |     "\n",
500 |     "\n",
501 |     "# Process the CSV into the embedable content vs the metadata and put it into Document format so that we can chunk it into pieces.\n",
502 |     "docs = []\n",
503 |     "with open((QA_catalogue), newline=\"\", encoding='utf-8-sig') as csvfile:\n",
504 |     "    csv_reader = csv.DictReader(csvfile)\n",
505 |     "    for i, row in enumerate(csv_reader):\n",
506 |     "        to_metadata = {col: row[col] for col in columns_to_metadata if col in row}\n",
507 |     "        values_to_embed = {k: row[k] for k in columns_to_embed if k in row}\n",
508 |     "        to_embed = \"\\n\".join(f\"{k.strip()}: {v.strip()}\" for k, v in values_to_embed.items())\n",
509 |     "        newDoc = Document(page_content=to_embed, metadata=to_metadata)\n",
510 |     "        docs.append(newDoc)\n",
511 |     "\n",
512 |     "# Lets split the document using Chracter splitting. \n",
513 |     "splitter = CharacterTextSplitter(separator = \"\\n\",\n",
514 |     "                                chunk_size=500, \n",
515 |     "                                chunk_overlap=0,\n",
516 |     "                                length_function=len)\n",
517 |     "documents = splitter.split_documents(docs)\n",
518 |     "# Now that we have the chunks, we will generate the embeddings and insert the values into Chroma. Each vector inserted will have both the vector representation that will be used for similarity search as well as the metadata values we added.\n",
519 |     "\n",
520 |     "# Generate embeddings from documents and store in a vector database\n",
521 |     "embeddings_model = OpenAIEmbeddings()\n",
522 |     "db = Chroma.from_documents(documents, OpenAIEmbeddings())\n",
523 |     "# Query the vector for information.\n",
524 |     "query = \"I have no friends and feel lonely.\"\n",
525 |     "docs = db.similarity_search(query)\n",
526 |     "print(docs[0].page_content)\n",
527 |     "print(docs[0].metadata)"
528 |    ]
529 |   },
530 |   {
531 |    "cell_type": "code",
532 |    "execution_count": 5,
533 |    "metadata": {},
534 |    "outputs": [
535 |     {
536 |      "data": {
537 |       "text/plain": [
538 |        "[Document(page_content=\"Everyone feels lonely at times—maybe you recently moved to a new city, are changing your circle of friends, lost someone important in your life, or lost your job and also lost important social connections with coworkers. Other people may have physical connections to others but may feel like their emotional or social needs aren't met. Measures like social distancing or self-isolation can make loneliness feel worse no matter why you feel lonely now.\", metadata={'Answers': \"A lot of people are alone right now, but we don't have to be lonely. We're all in this together. \\n While you may be physically separated from friends, family members, and other loved ones, it has never been more important to maintain those social connections. Social connections are an opportunity to seek and share support, talk through difficult feelings, share a laugh, keep up-to-date with loved ones, and help each other cope. This pandemic is a lot for one person to deal with on their own. While measures like physical distancing and self-isolation are necessary to slow the spread of the virus, the physical separation can amplify a lot of challenging emotions like loneliness and fear. \\n Think about the different ways to connect that are most meaningful for you. For example, you might prefer a video chat over a phone call, or you might prefer to text throughout the day rather than one set time for a video call. Then, work with your social networks to make a plan. You might video chat with your close friends in the evening and phone a family member once a week. \\n Remember to be mindful of people who may not be online. Check in by phone and ask how you can help. \\n The quality of your social connections matter. Mindlessly scrolling through social media and liking a few posts usually doesn't build strong social connections. Make sure you focus on strategies that actually make you feel included and connected. If your current strategies don't help you feel connected, problem-solve to see if you can find a solution. \\n Everyone feels lonely at times—maybe you recently moved to a new city, are changing your circle of friends, lost someone important in your life, or lost your job and also lost important social connections with coworkers. Other people may have physical connections to others but may feel like their emotional or social needs aren't met. Measures like social distancing or self-isolation can make loneliness feel worse no matter why you feel lonely now. \\n Reach out to the connections you do have. Suggest ways to keep in touch and see if you can set a regular time to connect. People may hesitate to reach out for a lot of different reasons, so don't be afraid to be the one who asks. \\n Look for local community support groups and mutual aid groups on social media. This pandemic is bringing everyone together, so look for opportunities to make new connections. These groups are a great way to share your skills and abilities or seek help and support. \\n Look for specialized support groups. Support groups are moving online, and there are a lot of different support lines to call if you need to talk to someone. To find community services in BC, call or text 211 or visit www.bc211.ca. \\n If you need extra support, you can talk with a psychologist or counsellor for free: \\n You can access a free phone call with a Registered Psychologist though the Covid-19 Psychological Support Service from the BC Psychological Association. Visit www.psychologists.bc.ca/covid-19-resources. \\n You can access free, phone-based, short-term support with a counsellor from a new group called the BC COVID-19 Mental Health Network. Email bccovidtherapists@gmail.com to receive an appointment time. \\n For youth people ages 12-24, you can talk with a counsellor for free through Foundry Virtual. Visit foundrybc.ca/get-support/virtual/. \\n Call the BC Mental Health Support Line at 310-6789. It’s available 24/7. \\n Chat online with a Crisis Center volunteer at www.crisiscentrechat.ca (daily between noon and 1:00am) \\n For older adults: Call the Seniors Distress Line at 604-872-123 \\n For youth and young adults: Chat online with a volunteer at www.YouthinBC.com (daily between noon and 1:00am) \\n For children and youth: Call the Kids Help Phone at 1-800-668-6868 or visit kidshelpphone.ca \\n For tips on managing loneliness, check out the following resources: \\n Coping with Loneliness from the Canadian Mental Health Association: cmha.bc.ca/documents/coping-with-loneliness/ \\n Loneliness and Social Connection issue of Visions Journal at www.heretohelp.bc.ca/visions/loneliness-and-social-connection-vol14 \\n Wellness Module 3: Social Support at www.heretohelp.bc.ca/wellness-module/wellness-module-3-social-support\", 'Questions': 'How can I maintain social connections? What if I feel lonely?'}),\n",
539 |        " Document(page_content=\"Questions: How can I maintain social connections? What if I feel lonely?\\nAnswers: A lot of people are alone right now, but we don't have to be lonely. We're all in this together.\", metadata={'Answers': \"A lot of people are alone right now, but we don't have to be lonely. We're all in this together. \\n While you may be physically separated from friends, family members, and other loved ones, it has never been more important to maintain those social connections. Social connections are an opportunity to seek and share support, talk through difficult feelings, share a laugh, keep up-to-date with loved ones, and help each other cope. This pandemic is a lot for one person to deal with on their own. While measures like physical distancing and self-isolation are necessary to slow the spread of the virus, the physical separation can amplify a lot of challenging emotions like loneliness and fear. \\n Think about the different ways to connect that are most meaningful for you. For example, you might prefer a video chat over a phone call, or you might prefer to text throughout the day rather than one set time for a video call. Then, work with your social networks to make a plan. You might video chat with your close friends in the evening and phone a family member once a week. \\n Remember to be mindful of people who may not be online. Check in by phone and ask how you can help. \\n The quality of your social connections matter. Mindlessly scrolling through social media and liking a few posts usually doesn't build strong social connections. Make sure you focus on strategies that actually make you feel included and connected. If your current strategies don't help you feel connected, problem-solve to see if you can find a solution. \\n Everyone feels lonely at times—maybe you recently moved to a new city, are changing your circle of friends, lost someone important in your life, or lost your job and also lost important social connections with coworkers. Other people may have physical connections to others but may feel like their emotional or social needs aren't met. Measures like social distancing or self-isolation can make loneliness feel worse no matter why you feel lonely now. \\n Reach out to the connections you do have. Suggest ways to keep in touch and see if you can set a regular time to connect. People may hesitate to reach out for a lot of different reasons, so don't be afraid to be the one who asks. \\n Look for local community support groups and mutual aid groups on social media. This pandemic is bringing everyone together, so look for opportunities to make new connections. These groups are a great way to share your skills and abilities or seek help and support. \\n Look for specialized support groups. Support groups are moving online, and there are a lot of different support lines to call if you need to talk to someone. To find community services in BC, call or text 211 or visit www.bc211.ca. \\n If you need extra support, you can talk with a psychologist or counsellor for free: \\n You can access a free phone call with a Registered Psychologist though the Covid-19 Psychological Support Service from the BC Psychological Association. Visit www.psychologists.bc.ca/covid-19-resources. \\n You can access free, phone-based, short-term support with a counsellor from a new group called the BC COVID-19 Mental Health Network. Email bccovidtherapists@gmail.com to receive an appointment time. \\n For youth people ages 12-24, you can talk with a counsellor for free through Foundry Virtual. Visit foundrybc.ca/get-support/virtual/. \\n Call the BC Mental Health Support Line at 310-6789. It’s available 24/7. \\n Chat online with a Crisis Center volunteer at www.crisiscentrechat.ca (daily between noon and 1:00am) \\n For older adults: Call the Seniors Distress Line at 604-872-123 \\n For youth and young adults: Chat online with a volunteer at www.YouthinBC.com (daily between noon and 1:00am) \\n For children and youth: Call the Kids Help Phone at 1-800-668-6868 or visit kidshelpphone.ca \\n For tips on managing loneliness, check out the following resources: \\n Coping with Loneliness from the Canadian Mental Health Association: cmha.bc.ca/documents/coping-with-loneliness/ \\n Loneliness and Social Connection issue of Visions Journal at www.heretohelp.bc.ca/visions/loneliness-and-social-connection-vol14 \\n Wellness Module 3: Social Support at www.heretohelp.bc.ca/wellness-module/wellness-module-3-social-support\", 'Questions': 'How can I maintain social connections? What if I feel lonely?'}),\n",
540 |        " Document(page_content='Seek support. Grief can feel very isolating, even though a lot of people are experiencing some sort of loss right now. Reach out to friends or family and share your feelings. Look for ways to help and support each other.', metadata={'Answers': \"While a lot of people think of grief in terms of losing a person or pet, grief can come up whenever you lose something important. This includes: \\n Losing security, like losing your job or wondering how long you'll be able to pay rent \\n Losing stability or routine, like finding yourself working from home or navigating childcare closures \\n Losing your sense of safety, like fearing you or someone you love might end up with COVID-19 \\n Losing your social relationships, like missing time with family and friends now that everyone must practice physical distancing or self-isolation \\n Losing hope for the future, like feeling that life will never go back to normal \\n Losing important goals, like finding your classes, sports competitions, or performances are cancelled for the foreseeable future \\n Losing important milestone celebrations like graduation ceremonies and weddings \\n Grief bring up complicated feelings. You might feel sad, angry, frustrated, fearful, or hopeless. You may have a hard time eating or sleeping, or feel very tense. You may feel overwhelmed and tired. You may wonder if life will ever feel normal again. \\n Everyone grieves in their own way and their own time. Here are some strategies to try as you navigate your own journey. \\n Acknowledge and express your feelings in a healthy way. Give your feelings a name and find healthy ways to express them, such as by talking with a friend, writing in a journal, or making art. \\n Give yourself as much time as you need. Grief follows its own schedule. Give yourself permission to use this time to take care of your well-being. Let go of expectations, tasks, or other obligations that can wait. \\n Seek support. Grief can feel very isolating, even though a lot of people are experiencing some sort of loss right now. Reach out to friends or family and share your feelings. Look for ways to help and support each other. \\n Take care of yourself. Ignoring health and well-being can make difficult experiences feel worse. Eat as well as you can, try to get enough sleep, spend time outside if it's safe for you to do so, and exercise regularly. Think about self-care activities or strategies that have helped you cope with challenging situations in the past and make time for those activities. \\n Know that feelings of grief will pass. Grief may feel intense at times, but those feelings will become more manageable over time and will eventually pass. \\n Connect with a mental health professional if you're having a hard time. If you're having a hard time getting through the day, coping in unhealthy ways, or having a hard time managing difficult thoughts or feelings, it's a good idea to seek help from a professional like a psychologist or counsellor—many now offer online or phone appointments. To find help: \\n For everyone \\n BC Psychological Association: Find a Registered Psychologist at www.psychologists.bc.ca/find_psychologist \\n BC Association of Clinical Counsellors: Find a Registered Clinical Counsellor at bc-counsellors.org \\n BC Mental Health Support Line: call 310-6789 (no area code) for to learn about services in your area or just to talk with someone right now \\n For young people \\n Kids Help Phone: Talk to a counsellor at 1-800-668-6868 or chat at kidshelpphone.ca (available 24/7) \\n Foundry: Contact your local Foundry office at foundrybc.ca for Foundy Virtual and information about local resources (for youth ages 12-24) \\n Youth in BC: Chat with a crisis line responder at youthinbc.com (available every day from noon – 1:00am) \\n That Discomfort You're Feeling Is Grief in Harvard Business Review at hbr.org/2020/03/that-discomfort-youre-feeling-is-grief \\n Grieving from the Canadian Mental Health Association at cmha.ca/documents/grieving \\n Coping with Grief and Loss from Mind Your Mind at mindyourmind.ca/wellness/coping-grief-and-loss\", 'Questions': 'How can I manage grief?'}),\n",
541 |        " Document(page_content='While you may be physically separated from friends, family members, and other loved ones, it has never been more important to maintain those social connections. Social connections are an opportunity to seek and share support, talk through difficult feelings, share a laugh, keep up-to-date with loved ones, and help each other cope. This pandemic is a lot for one person to deal with on their own. While measures like physical distancing and self-isolation are necessary to slow the spread of the virus, the physical separation can amplify a lot of challenging emotions like loneliness and fear.', metadata={'Answers': \"A lot of people are alone right now, but we don't have to be lonely. We're all in this together. \\n While you may be physically separated from friends, family members, and other loved ones, it has never been more important to maintain those social connections. Social connections are an opportunity to seek and share support, talk through difficult feelings, share a laugh, keep up-to-date with loved ones, and help each other cope. This pandemic is a lot for one person to deal with on their own. While measures like physical distancing and self-isolation are necessary to slow the spread of the virus, the physical separation can amplify a lot of challenging emotions like loneliness and fear. \\n Think about the different ways to connect that are most meaningful for you. For example, you might prefer a video chat over a phone call, or you might prefer to text throughout the day rather than one set time for a video call. Then, work with your social networks to make a plan. You might video chat with your close friends in the evening and phone a family member once a week. \\n Remember to be mindful of people who may not be online. Check in by phone and ask how you can help. \\n The quality of your social connections matter. Mindlessly scrolling through social media and liking a few posts usually doesn't build strong social connections. Make sure you focus on strategies that actually make you feel included and connected. If your current strategies don't help you feel connected, problem-solve to see if you can find a solution. \\n Everyone feels lonely at times—maybe you recently moved to a new city, are changing your circle of friends, lost someone important in your life, or lost your job and also lost important social connections with coworkers. Other people may have physical connections to others but may feel like their emotional or social needs aren't met. Measures like social distancing or self-isolation can make loneliness feel worse no matter why you feel lonely now. \\n Reach out to the connections you do have. Suggest ways to keep in touch and see if you can set a regular time to connect. People may hesitate to reach out for a lot of different reasons, so don't be afraid to be the one who asks. \\n Look for local community support groups and mutual aid groups on social media. This pandemic is bringing everyone together, so look for opportunities to make new connections. These groups are a great way to share your skills and abilities or seek help and support. \\n Look for specialized support groups. Support groups are moving online, and there are a lot of different support lines to call if you need to talk to someone. To find community services in BC, call or text 211 or visit www.bc211.ca. \\n If you need extra support, you can talk with a psychologist or counsellor for free: \\n You can access a free phone call with a Registered Psychologist though the Covid-19 Psychological Support Service from the BC Psychological Association. Visit www.psychologists.bc.ca/covid-19-resources. \\n You can access free, phone-based, short-term support with a counsellor from a new group called the BC COVID-19 Mental Health Network. Email bccovidtherapists@gmail.com to receive an appointment time. \\n For youth people ages 12-24, you can talk with a counsellor for free through Foundry Virtual. Visit foundrybc.ca/get-support/virtual/. \\n Call the BC Mental Health Support Line at 310-6789. It’s available 24/7. \\n Chat online with a Crisis Center volunteer at www.crisiscentrechat.ca (daily between noon and 1:00am) \\n For older adults: Call the Seniors Distress Line at 604-872-123 \\n For youth and young adults: Chat online with a volunteer at www.YouthinBC.com (daily between noon and 1:00am) \\n For children and youth: Call the Kids Help Phone at 1-800-668-6868 or visit kidshelpphone.ca \\n For tips on managing loneliness, check out the following resources: \\n Coping with Loneliness from the Canadian Mental Health Association: cmha.bc.ca/documents/coping-with-loneliness/ \\n Loneliness and Social Connection issue of Visions Journal at www.heretohelp.bc.ca/visions/loneliness-and-social-connection-vol14 \\n Wellness Module 3: Social Support at www.heretohelp.bc.ca/wellness-module/wellness-module-3-social-support\", 'Questions': 'How can I maintain social connections? What if I feel lonely?'})]"
542 |       ]
543 |      },
544 |      "execution_count": 5,
545 |      "metadata": {},
546 |      "output_type": "execute_result"
547 |     }
548 |    ],
549 |    "source": [
550 |     "from langchain.llms import OpenAI\n",
551 |     "from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
552 |     "from langchain.chains.query_constructor.base import AttributeInfo\n",
553 |     "\n",
554 |     "# Metadata schema based on the values on the CSV\n",
555 |     "metadata_field_info = [\n",
556 |     "    AttributeInfo(\n",
557 |     "        name=\"Questions\",\n",
558 |     "        description=\"A question the user or patient might ask\",\n",
559 |     "        type=\"string\",\n",
560 |     "    ),\n",
561 |     "    AttributeInfo(\n",
562 |     "        name=\"Answers\",\n",
563 |     "        description=\"One or more answers to the question asked by the user or patient\",\n",
564 |     "        type=\"string\",\n",
565 |     "    ),\n",
566 |     "]\n",
567 |     "document_content_description = \"standardized question and answer catalogue for mental health\"\n",
568 |     "\n",
569 |     "# Configure retriver\n",
570 |     "llm = OpenAI(temperature=0)\n",
571 |     "retriever = SelfQueryRetriever.from_llm(\n",
572 |     "    llm, db, document_content_description, metadata_field_info, verbose=True)\n",
573 |     "\n",
574 |     "\n",
575 |     "# Retrieve values\n",
576 |     "retriever.get_relevant_documents(\"I feel lonely\")\n",
577 |     "\n",
578 |     "# The result is:\n",
579 |     "# Description: Achieve your fitness goals with our Fitness Tracker Smartwatch. Monitor your activity, heart rate, and receive notifications on your wrist.\\n\n",
580 |     "# Features: - Heart rate monitor and activity tracking.<br> - Built-in GPS for accurate workout tracking.<br> - Sleep analysis and guided breathing exercises.<br> - Receive notifications from your smartphone.<br> - Water-resistant for workouts and everyday use.<br> - Long battery life.', \n",
581 |     "# metadata={\n",
582 |     "#    'Product Name': 'Fitness Tracker Smartwatch', \n",
583 |     "#    'Price': '149.99', \n",
584 |     "#    'Rating': '4.5', \n",
585 |     "#    'Description': 'Achieve your fitness goals with our Fitness Tracker Smartwatch. Monitor your activity, heart rate, and receive notifications on your wrist.', \n",
586 |     "#    'Features': '- Heart rate monitor and activity tracking.<br> - Built-in GPS for accurate workout tracking.<br> - Sleep analysis and guided breathing exercises.<br> - Receive notifications from your smartphone.<br> - Water-resistant for workouts and everyday use.<br> - Long battery life.'\n",
587 |     "# }"
588 |    ]
589 |   }
590 |  ],
591 |  "metadata": {
592 |   "kernelspec": {
593 |    "display_name": "mendu_chat",
594 |    "language": "python",
595 |    "name": "python3"
596 |   },
597 |   "language_info": {
598 |    "codemirror_mode": {
599 |     "name": "ipython",
600 |     "version": 3
601 |    },
602 |    "file_extension": ".py",
603 |    "mimetype": "text/x-python",
604 |    "name": "python",
605 |    "nbconvert_exporter": "python",
606 |    "pygments_lexer": "ipython3",
607 |    "version": "3.10.13"
608 |   }
609 |  },
610 |  "nbformat": 4,
611 |  "nbformat_minor": 2
612 | }
613 | 


--------------------------------------------------------------------------------
/rag_context.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import faiss
 3 | from transformers import AutoModel
 4 | 
 5 | user_name = 'Jill'
 6 | guideline_path = './data/Mental_Health_FAQ.csv'
 7 | past_conversation_path = "./data/transcript2"
 8 | embedding_model_name = 'jinaai/jina-embeddings-v2-small-en'
 9 | embedding_model = AutoModel.from_pretrained(embedding_model_name, trust_remote_code=True)
10 | 
11 | def save_embeddings(data, embedding_model):
12 |     """Save the embeddings to disk."""
13 |     # cos_sim = lambda a,b: (a @ b.T) / (norm(a)*norm(b))
14 |     # tokenizer = AutoTokenizer.from_pretrained(embedding_model)
15 |     model = AutoModel.from_pretrained(embedding_model, trust_remote_code=True) # trust_remote_code is needed to use the encode method
16 |     embeddings = model.encode(data)
17 | 
18 |     index = faiss.IndexFlatL2(embeddings.shape[1])
19 |     index.add(embeddings)
20 | 
21 |     # save the embeddings to disk
22 |     return faiss.write_index(index, "embeddings.index") 
23 | 
24 | def load_data(guideline_path, past_conversation_path, user_name):
25 |     """Load the guideline and past conversation."""
26 |     ## Load the general guideline from a.csv file
27 |     qa_df = pd.read_csv(guideline_path)
28 | 
29 |     # Select the question and answer columns
30 |     df = qa_df
31 |     selected_columns = df.iloc[:, [1, 2]]
32 |     combined_columns = selected_columns.apply(lambda x: ''.join(x), axis=1)
33 |     row_list = combined_columns.tolist()
34 | 
35 |     ## load client information, past conversation, and process the conversation
36 |     with open(past_conversation_path, "r") as file:
37 |         file_content = file.read()
38 | 
39 |     # Replace client with Jill
40 |     updated_content = file_content.replace('client', user_name).replace('\n', ' ').replace('\t', ' ')
41 | 
42 |     # Split the content into general information and dialogues
43 |     dialogues = updated_content.split('Therapist:')
44 |     user_background, dialogues = [dialogues[0]], dialogues[1:]
45 | 
46 |     # Process each dialogue
47 |     processed_dialogues = []
48 |     for dialogue in dialogues:
49 |         parts = dialogue.split('Jill:')
50 |         for part in parts:
51 |             processed_dialogues.append('Therapist:' + part if part.startswith(' ') else 'Jill:' + part)
52 | 
53 |     ## concatenate all contents 
54 |     combined_infos = row_list + processed_dialogues
55 |     return user_background, combined_infos
56 | 
57 | def retrieve_similar_sentences(query, index, k=3, filter_distance=41, model = embedding_model):
58 |     query_embedding = model.encode(f'{query}')
59 |     if len(query_embedding.shape) == 1:
60 |         query_embedding = query_embedding.reshape(1, -1)
61 |     D, I = index.search(query_embedding, k)
62 |     
63 |     # Filter out indices with distance greater than 41
64 |     filtered_indices = [I[0][i] for i in range(len(D[0])) if D[0][i] < filter_distance]
65 |     filtered_distances = [D[0][i] for i in range(len(D[0])) if D[0][i] < filter_distance]
66 |     return filtered_distances, filtered_indices
67 | 
68 | if __name__ == '__main__':
69 |     # Load the data
70 |     user_background, combined_infos = load_data(guideline_path, past_conversation_path, user_name)
71 | 
72 |     # Save the embeddings
73 |     save_embeddings(combined_infos, embedding_model)
74 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | openai
2 | python-dotenv
3 | transformers
4 | faiss-cpu
5 | pandas
6 | numpy
7 | torch


--------------------------------------------------------------------------------