├── Me_Bot.ipynb
├── README.md
├── clean_wechat_chats.py
├── clean_whatsapp_chats.py
├── pictures
    └── MM.sqlite.png
└── prepare_files.ipynb


/Me_Bot.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 16,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "INFO:tensorflow:Using /tmp/tfhub_modules to cache modules.\n",
 13 |       "SentencePiece model loaded at b'/tmp/tfhub_modules/539544f0a997d91c327c23285ea00c37588d92cc/assets/universal_encoder_8k_spm.model'.\n"
 14 |      ]
 15 |     }
 16 |    ],
 17 |    "source": [
 18 |     "import sys\n",
 19 |     "sys.path.append('/usr/local/lib/python3.5/dist-packages/')\n",
 20 |     "import tensorflow as tf\n",
 21 |     "import tensorflow_hub as hub\n",
 22 |     "import numpy as np\n",
 23 |     "import os\n",
 24 |     "import http.client, urllib.request, urllib.parse, urllib.error, base64\n",
 25 |     "import json\n",
 26 |     "import warnings\n",
 27 |     "warnings.filterwarnings(\"ignore\")\n",
 28 |     "import pickle\n",
 29 |     "import sentencepiece as spm\n",
 30 |     "\n",
 31 |     "\n",
 32 |     "media_app='whatsapp'# modify your media app here\n",
 33 |     "\n",
 34 |     "module_url = \"https://tfhub.dev/google/universal-sentence-encoder-lite/2\"\n",
 35 |     "embed = hub.Module(module_url)\n",
 36 |     "tf.logging.set_verbosity(tf.logging.WARN)\n",
 37 |     "\n",
 38 |     "module = hub.Module(\"https://tfhub.dev/google/universal-sentence-encoder-lite/2\")\n",
 39 |     "input_placeholder = tf.sparse_placeholder(tf.int64, shape=[None, None])\n",
 40 |     "encodings = module(\n",
 41 |     "    inputs=dict(\n",
 42 |     "        values=input_placeholder.values,\n",
 43 |     "        indices=input_placeholder.indices,\n",
 44 |     "        dense_shape=input_placeholder.dense_shape))\n",
 45 |     "\n",
 46 |     "with tf.Session() as sess:\n",
 47 |     "    spm_path = sess.run(module(signature=\"spm_path\"))\n",
 48 |     "\n",
 49 |     "sp = spm.SentencePieceProcessor()\n",
 50 |     "sp.Load(spm_path)\n",
 51 |     "print(\"SentencePiece model loaded at {}.\".format(spm_path))\n",
 52 |     "\n",
 53 |     "def process_to_IDs_in_sparse_format(sp, sentences):\n",
 54 |     "  # An utility method that processes sentences with the sentence piece processor\n",
 55 |     "  # 'sp' and returns the results in tf.SparseTensor-similar format:\n",
 56 |     "  # (values, indices, dense_shape)\n",
 57 |     "    ids = [sp.EncodeAsIds(x) for x in sentences]\n",
 58 |     "    max_len = max(len(x) for x in ids)\n",
 59 |     "    dense_shape=(len(ids), max_len)\n",
 60 |     "    values=[item for sublist in ids for item in sublist]\n",
 61 |     "    indices=[[row,col] for row in range(len(ids)) for col in range(len(ids[row]))]\n",
 62 |     "    return (values, indices, dense_shape)\n",
 63 |     "\n",
 64 |     "def embed_sentence_lite(sentences):\n",
 65 |     "    messages = sentences\n",
 66 |     "    values, indices, dense_shape = process_to_IDs_in_sparse_format(sp, messages)\n",
 67 |     "\n",
 68 |     "    # Reduce logging output.\n",
 69 |     "    tf.logging.set_verbosity(tf.logging.ERROR)\n",
 70 |     "\n",
 71 |     "    with tf.Session() as session:\n",
 72 |     "        session.run([tf.global_variables_initializer(), tf.tables_initializer()])\n",
 73 |     "        message_embeddings = session.run(\n",
 74 |     "          encodings,\n",
 75 |     "          feed_dict={input_placeholder.values: values,\n",
 76 |     "                    input_placeholder.indices: indices,\n",
 77 |     "                    input_placeholder.dense_shape: dense_shape})\n",
 78 |     "    \n",
 79 |     "    return message_embeddings"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 18,
 85 |    "metadata": {
 86 |     "collapsed": true
 87 |    },
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "def find_closest(sentence_rep,query_rep,K):\n",
 91 |     "    top_K = np.argsort(np.sqrt((np.sum(np.square(sentence_rep - query_rep),axis=1))))[:K]\n",
 92 |     "    return top_K"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 46,
 98 |    "metadata": {
 99 |     "collapsed": true
100 |    },
101 |    "outputs": [],
102 |    "source": [
103 |     "import pickle\n",
104 |     "f = open('res/'+media_app+'/other_embeddings.p','rb')\n",
105 |     "other_embeddings = pickle.load(f)\n",
106 |     "f.close()\n",
107 |     "\n",
108 |     "f = open('res/'+media_app+'/your_embeddings.p','rb')\n",
109 |     "your_embeddings = pickle.load(f)\n",
110 |     "f.close()\n",
111 |     "\n",
112 |     "f = open('res/'+media_app+'/dilogues.p','rb')\n",
113 |     "pr_to_sp = pickle.load(f)\n",
114 |     "f.close()\n",
115 |     "\n",
116 |     "\n",
117 |     "f = open('res/'+media_app+'/your_sents.p','rb')\n",
118 |     "your_sentences = pickle.load(f)\n",
119 |     "f.close()"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 27,
125 |    "metadata": {
126 |     "collapsed": true
127 |    },
128 |    "outputs": [],
129 |    "source": [
130 |     "keys = list(pr_to_sp.keys())"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 28,
136 |    "metadata": {
137 |     "collapsed": true
138 |    },
139 |    "outputs": [],
140 |    "source": [
141 |     "f = open('res/'+media_app+'/key_embeddings.p','rb')\n",
142 |     "key_embeddings = pickle.load(f)\n",
143 |     "f.close()"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 81,
149 |    "metadata": {
150 |     "collapsed": true
151 |    },
152 |    "outputs": [],
153 |    "source": [
154 |     "def speak_like_me(query,K,your_embeddings,other_embeddings,your_sen):\n",
155 |     "    other_query = [query]\n",
156 |     "    query_embedding = embed_sentence_lite(other_query)\n",
157 |     "    closest_your = find_closest(your_embeddings,query_embedding,K)\n",
158 |     "    for cl in closest_your:\n",
159 |     "        print(your_sentences[cl])"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 76,
165 |    "metadata": {
166 |     "collapsed": true
167 |    },
168 |    "outputs": [],
169 |    "source": [
170 |     "def respond_like_me(query,K,key_embeddings,keys):\n",
171 |     "    other_query = [query]\n",
172 |     "    query_embedding = embed_sentence_lite(other_query)\n",
173 |     "    closest_other = find_closest(key_embeddings,query_embedding,K+2)\n",
174 |     "    for k in closest_other[3:]:\n",
175 |     "        print(pr_to_sp[keys[k]])"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 79,
181 |    "metadata": {},
182 |    "outputs": [
183 |     {
184 |      "name": "stdout",
185 |      "output_type": "stream",
186 |      "text": [
187 |       "Work time now\n",
188 |       "\n",
189 |       "Potty :P\n",
190 |       "\n",
191 |       "Probably the first time you'll hear me say jt\n",
192 |       "\n"
193 |      ]
194 |     }
195 |    ],
196 |    "source": [
197 |     "respond_like_me(\"What's up?\",4,key_embeddings,keys)"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": 82,
203 |    "metadata": {},
204 |    "outputs": [
205 |     {
206 |      "name": "stdout",
207 |      "output_type": "stream",
208 |      "text": [
209 |       "So so hungry\n",
210 |       "\n",
211 |       "Reeeaaaallly hungry\n",
212 |       "\n",
213 |       "I am in the mood to eat\n",
214 |       "\n",
215 |       "I want to eat that so badly. 😣\n",
216 |       "\n",
217 |       "I want that food\n",
218 |       "\n"
219 |      ]
220 |     }
221 |    ],
222 |    "source": [
223 |     "speak_like_me(\"I am so hungry\",5,your_embeddings,other_embeddings,your_sentences)"
224 |    ]
225 |   }
226 |  ],
227 |  "metadata": {
228 |   "kernelspec": {
229 |    "display_name": "Python 3",
230 |    "language": "python",
231 |    "name": "python3"
232 |   },
233 |   "language_info": {
234 |    "codemirror_mode": {
235 |     "name": "ipython",
236 |     "version": 3
237 |    },
238 |    "file_extension": ".py",
239 |    "mimetype": "text/x-python",
240 |    "name": "python",
241 |    "nbconvert_exporter": "python",
242 |    "pygments_lexer": "ipython3",
243 |    "version": "3.6.2"
244 |   }
245 |  },
246 |  "nbformat": 4,
247 |  "nbformat_minor": 2
248 | }
249 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Me_Bot
 2 | A simple tool to make a bot that speaks like you, simply learning from your WhatsApp Chats.
 3 | 
 4 | Instructions:-
 5 | 
 6 | 1. From WhatsApp on your phone, go to any chat and export it by going into the settings. Move the txt file that you receive inside the Me_Bot folder.
 7 | 
 8 | 2. Run the clean_whatsapp_chats.py script using the command. Before running, change the names of the people by changing YOUR_NAME and OTHER_NAME in the scripts according to the txt file you have for your chats.
 9 | 
10 | `python clean_whatsapp_chats.py whatsapp_chat.txt`
11 | 
12 | 3. Run the prepare_files.ipynb ipython notebook.
13 | 
14 | 4. Run the Me_Bot.ipynb file and you can play with the bot at the bottom!
15 | 
16 | NOTE - Actively seeking collaborators for fun side projects like this. If you're itnerested, please drop me a mail at smadan@mit.edu
17 | 
18 | ## For wechat user:
19 | Wechat chat history is save in SQLite Database, therefore you need to export from you Phone.
20 | Basically, if you have a iPhone, there are the steps to get the database file:
21 | 1. Use iTunes to backup your phone (unselect encrypt backup)
22 | 2. Use iTools to open the backup file and get a copy of your database file named MM.sqlite
23 | 3. run `python clean_wechat_chats.py YOUR_DATABASE_PATH YOUR_FRIEND_ID`
24 | 
25 | Find more detail information about this [here](https://www.cnblogs.com/cxun/p/5677606.html)
26 | 
27 | ![MM.sqlite](https://github.com/DH-Diego/Me_Bot/blob/master/pictures/MM.sqlite.png)
28 | 
29 | This is the database, each table starts with 'Chat_' corresponding to a chat history with a friend,  you need to find the table id you want to generate the chat robot to run the script above. Currently only English chat is supported, Chinese version is under constructing.
30 | 
31 | 


--------------------------------------------------------------------------------
/clean_wechat_chats.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle
 3 | import sqlite3
 4 | import sys
 5 | 
 6 | chat_db_file = sys.argv[1]
 7 | friend_id = sys.argv[2]
 8 | conn = sqlite3.connect(chat_db_file)
 9 | cursor = conn.cursor()
10 | cursor.execute('select CreateTime, Message, Status, Type from ' + friend_id)
11 | content = cursor.fetchall()
12 | all_text = []
13 | your_sents = []
14 | other_sents = []
15 | 
16 | prev_pr_to_sp = {}
17 | prev = None
18 | for line in content:
19 |     createTime = line[0]
20 |     message = line[1]  # 'message content'
21 |     status = line[2]  # 'status=2  or 3 means message from yourself from phone or computer, status=4 or 5  means message from other'
22 |     type_ = line[3]  # 'type = 1 means text message, type = 47 mean emoji (not sure ), type=10000 means  link(not sure)''
23 |     # print(message)
24 |     # print(status)
25 | 
26 | 
27 |     if type_ !=1:
28 |         continue
29 |     if status == 2 or status == 3:
30 |         your_sents.append(message)
31 |         all_text.append(message)
32 |         if prev == 'None':
33 |             continue
34 |         if prev == 'pr':
35 |             prev_pr_to_sp[other_sents[-1]] = message
36 |         prev = 'sp'
37 |     elif status == 4 or status == 5:
38 |         other_sents.append(message)
39 |         all_text.append(message)
40 |         prev = 'pr'
41 |     else:
42 |         print(line)
43 |         all_text[-1] += message
44 |         if prev == 'sp':
45 |             your_sents[-1] += message
46 |         elif prev == 'pr':
47 |             other_sents[-1] += message
48 | 
49 | if not os.path.isdir('res/wechat'):
50 |     if not os.path.isdir('res'):
51 |         os.mkdir('res')
52 |     os.mkdir('res/wechat')
53 | 
54 | 
55 | f = open('res/wechat/dilogues.p', 'wb')
56 | pickle.dump(prev_pr_to_sp, f)
57 | f.close()
58 | 
59 | f = open('res/wechat/all_text.p', 'wb')
60 | pickle.dump(all_text, f)
61 | f.close()
62 | 
63 | f = open('res/wechat/your_sents.p', 'wb')
64 | pickle.dump(your_sents, f)
65 | f.close()
66 | 
67 | f = open('res/wechat/other_sents.p', 'wb')
68 | pickle.dump(other_sents, f)
69 | f.close()
70 | 


--------------------------------------------------------------------------------
/clean_whatsapp_chats.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import random
 3 | import sys
 4 | import os
 5 | chat_file = sys.argv[1]
 6 | 
 7 | f = open(chat_file,'r', encoding="utf8")
 8 | content = f.readlines()
 9 | all_text = []
10 | your_sents = []
11 | other_sents = []
12 | 
13 | YOUR_NAME = 'YOUR NAME HERE'
14 | OTHER_NAME = 'OTHER NAME HERE'
15 | 
16 | prev_pr_to_sp = {}
17 | prev = None
18 | for line in content[1:]:
19 | 	if 'Missed Voice Call' in line:
20 | 		continue
21 | 	if 'image omitted' in line:
22 | 		continue
23 | 	if ' %s: '%YOUR_NAME in line:
24 | 		text = line.split(' %s: '%YOUR_NAME)[-1]
25 | 		your_sents.append(text)
26 | 		all_text.append(text)
27 | 		if prev == 'None':
28 | 			continue
29 | 		if prev == 'pr':
30 | 			prev_pr_to_sp[other_sents[-1]] = text
31 | 		prev = 'sp'
32 | 	elif ' %s: '%OTHER_NAME in line:
33 | 		text = line.split(' %s: '%OTHER_NAME)[-1]
34 | 		other_sents.append(text)
35 | 		all_text.append(text)
36 | 		prev = 'pr'
37 | 	else:
38 | 		print(line)        
39 | 		all_text[-1] += line
40 | 
41 | 		if prev == 'sp':
42 | 			your_sents[-1] += line
43 | 		elif prev == 'pr':
44 | 			other_sents[-1] += line
45 | 
46 | if not os.path.isdir('res/whatsapp'):
47 | 	if not os.path.isdir('res'):
48 | 		os.mkdir('res')
49 | 	os.mkdir('res/whatsapp')
50 | 
51 | 
52 | f = open('res/whatsapp/dilogues.p', 'wb')
53 | pickle.dump(prev_pr_to_sp, f)
54 | f.close()
55 | 
56 | f = open('res/whatsapp/all_text.p', 'wb')
57 | pickle.dump(all_text, f)
58 | f.close()
59 | 
60 | f = open('res/whatsapp/your_sents.p', 'wb')
61 | pickle.dump(your_sents, f)
62 | f.close()
63 | 
64 | f = open('res/whatsapp/other_sents.p', 'wb')
65 | pickle.dump(other_sents, f)
66 | f.close()
67 | 


--------------------------------------------------------------------------------
/pictures/MM.sqlite.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Spandan-Madan/Me_Bot/56d60c6925d8aaadcc8122db1dbc6cfdd362389a/pictures/MM.sqlite.png


--------------------------------------------------------------------------------
/prepare_files.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "INFO:tensorflow:Using /tmp/tfhub_modules to cache modules.\n",
 13 |       "SentencePiece model loaded at b'/tmp/tfhub_modules/539544f0a997d91c327c23285ea00c37588d92cc/assets/universal_encoder_8k_spm.model'.\n"
 14 |      ]
 15 |     }
 16 |    ],
 17 |    "source": [
 18 |     "import sys\n",
 19 |     "sys.path.append('/usr/local/lib/python3.5/dist-packages/')\n",
 20 |     "import tensorflow as tf\n",
 21 |     "import tensorflow_hub as hub\n",
 22 |     "import numpy as np\n",
 23 |     "import os\n",
 24 |     "import http.client, urllib.request, urllib.parse, urllib.error, base64\n",
 25 |     "import json\n",
 26 |     "import warnings\n",
 27 |     "warnings.filterwarnings(\"ignore\")\n",
 28 |     "import pickle\n",
 29 |     "import sentencepiece as spm\n",
 30 |     "\n",
 31 |     "media_app='whatsapp'# modify your media app here\n",
 32 |     "module_url = \"https://tfhub.dev/google/universal-sentence-encoder-lite/2\"\n",
 33 |     "embed = hub.Module(module_url)\n",
 34 |     "tf.logging.set_verbosity(tf.logging.WARN)\n",
 35 |     "\n",
 36 |     "module = hub.Module(\"https://tfhub.dev/google/universal-sentence-encoder-lite/2\")\n",
 37 |     "input_placeholder = tf.sparse_placeholder(tf.int64, shape=[None, None])\n",
 38 |     "encodings = module(\n",
 39 |     "    inputs=dict(\n",
 40 |     "        values=input_placeholder.values,\n",
 41 |     "        indices=input_placeholder.indices,\n",
 42 |     "        dense_shape=input_placeholder.dense_shape))\n",
 43 |     "\n",
 44 |     "with tf.Session() as sess:\n",
 45 |     "    spm_path = sess.run(module(signature=\"spm_path\"))\n",
 46 |     "\n",
 47 |     "sp = spm.SentencePieceProcessor()\n",
 48 |     "sp.Load(spm_path)\n",
 49 |     "print(\"SentencePiece model loaded at {}.\".format(spm_path))\n",
 50 |     "\n",
 51 |     "def process_to_IDs_in_sparse_format(sp, sentences):\n",
 52 |     "  # An utility method that processes sentences with the sentence piece processor\n",
 53 |     "  # 'sp' and returns the results in tf.SparseTensor-similar format:\n",
 54 |     "  # (values, indices, dense_shape)\n",
 55 |     "    ids = [sp.EncodeAsIds(x) for x in sentences]\n",
 56 |     "    max_len = max(len(x) for x in ids)\n",
 57 |     "    dense_shape=(len(ids), max_len)\n",
 58 |     "    values=[item for sublist in ids for item in sublist]\n",
 59 |     "    indices=[[row,col] for row in range(len(ids)) for col in range(len(ids[row]))]\n",
 60 |     "    return (values, indices, dense_shape)\n",
 61 |     "\n",
 62 |     "def embed_sentence_lite(sentences):\n",
 63 |     "    messages = sentences\n",
 64 |     "    values, indices, dense_shape = process_to_IDs_in_sparse_format(sp, messages)\n",
 65 |     "\n",
 66 |     "    # Reduce logging output.\n",
 67 |     "    tf.logging.set_verbosity(tf.logging.ERROR)\n",
 68 |     "\n",
 69 |     "    with tf.Session() as session:\n",
 70 |     "        session.run([tf.global_variables_initializer(), tf.tables_initializer()])\n",
 71 |     "        message_embeddings = session.run(\n",
 72 |     "          encodings,\n",
 73 |     "          feed_dict={input_placeholder.values: values,\n",
 74 |     "                    input_placeholder.indices: indices,\n",
 75 |     "                    input_placeholder.dense_shape: dense_shape})\n",
 76 |     "    \n",
 77 |     "    return message_embeddings"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 2,
 83 |    "metadata": {
 84 |     "collapsed": true
 85 |    },
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "def find_closest(sentence_rep,query_rep,K):\n",
 89 |     "    top_K = np.argsort(np.sqrt((np.sum(np.square(sentence_rep - query_rep),axis=1))))[:K]\n",
 90 |     "    return top_K"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": 3,
 96 |    "metadata": {
 97 |     "collapsed": true
 98 |    },
 99 |    "outputs": [],
100 |    "source": [
101 |     "def embed_sentences(sentences):\n",
102 |     "    message_embeddings = []\n",
103 |     "    user_embedding = []\n",
104 |     "    \n",
105 |     "    with tf.Session() as session:\n",
106 |     "        session.run([tf.global_variables_initializer(), tf.tables_initializer()])\n",
107 |     "        message_embeddings = session.run(embed(sentences))\n",
108 |     "\n",
109 |     "    return message_embeddings\n",
110 |     "\n",
111 |     "def get_sentiments(json_body):\n",
112 |     "    headers = {\n",
113 |     "        # Request headers\n",
114 |     "        'Content-Type': 'application/json',\n",
115 |     "        'Ocp-Apim-Subscription-Key': YOUR_KEY_HERE,\n",
116 |     "    }\n",
117 |     "\n",
118 |     "    params = urllib.parse.urlencode({})\n",
119 |     "\n",
120 |     "    conn = http.client.HTTPSConnection('westus.api.cognitive.microsoft.com')\n",
121 |     "    conn.request(\"POST\", \"/text/analytics/v2.0/sentiment?\", json_body, headers)\n",
122 |     "    response = conn.getresponse()\n",
123 |     "    data = response.read()\n",
124 |     "    return data\n"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": null,
130 |    "metadata": {
131 |     "collapsed": true
132 |    },
133 |    "outputs": [],
134 |    "source": [
135 |     "f = open('res/'+media_app+'/your_sents.p','rb')\n",
136 |     "your_sentences = pickle.load(f)\n",
137 |     "f.close()\n",
138 |     "\n",
139 |     "list_embeds = []\n",
140 |     "for i in range(0,len(your_sentences),500):\n",
141 |     "    print(i)\n",
142 |     "    list_embeds.append(embed_sentence_lite(your_sentences[i:i+500]))\n",
143 |     "    \n",
144 |     "your_embeddings = np.vstack(list_embeds)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 6,
150 |    "metadata": {
151 |     "collapsed": true
152 |    },
153 |    "outputs": [],
154 |    "source": [
155 |     "f = open('res/'+media_app+'/your_embeddings.p','wb')\n",
156 |     "pickle.dump(your_embeddings,f)\n",
157 |     "f.close()"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 7,
163 |    "metadata": {},
164 |    "outputs": [
165 |     {
166 |      "name": "stdout",
167 |      "output_type": "stream",
168 |      "text": [
169 |       "0\n",
170 |       "500\n",
171 |       "1000\n",
172 |       "1500\n",
173 |       "2000\n",
174 |       "2500\n",
175 |       "3000\n",
176 |       "3500\n",
177 |       "4000\n",
178 |       "4500\n",
179 |       "5000\n",
180 |       "5500\n",
181 |       "6000\n",
182 |       "6500\n",
183 |       "7000\n",
184 |       "7500\n",
185 |       "8000\n",
186 |       "8500\n",
187 |       "9000\n",
188 |       "9500\n",
189 |       "10000\n",
190 |       "10500\n",
191 |       "11000\n",
192 |       "11500\n",
193 |       "12000\n",
194 |       "12500\n",
195 |       "13000\n",
196 |       "13500\n",
197 |       "14000\n",
198 |       "14500\n",
199 |       "15000\n",
200 |       "15500\n",
201 |       "16000\n",
202 |       "16500\n",
203 |       "17000\n",
204 |       "17500\n",
205 |       "18000\n",
206 |       "18500\n",
207 |       "19000\n",
208 |       "19500\n",
209 |       "20000\n",
210 |       "20500\n",
211 |       "21000\n",
212 |       "21500\n",
213 |       "22000\n",
214 |       "22500\n",
215 |       "23000\n",
216 |       "23500\n",
217 |       "24000\n",
218 |       "24500\n",
219 |       "25000\n",
220 |       "25500\n",
221 |       "26000\n",
222 |       "26500\n",
223 |       "27000\n",
224 |       "27500\n",
225 |       "28000\n",
226 |       "28500\n",
227 |       "29000\n",
228 |       "29500\n",
229 |       "30000\n",
230 |       "30500\n"
231 |      ]
232 |     }
233 |    ],
234 |    "source": [
235 |     "f = open('res/'+media_app+'/other_sents.p','rb')\n",
236 |     "other_sentences = pickle.load(f)\n",
237 |     "f.close()\n",
238 |     "\n",
239 |     "list_embeds = []\n",
240 |     "for i in range(0,len(other_sentences),500):\n",
241 |     "    print(i)\n",
242 |     "    list_embeds.append(embed_sentence_lite(other_sentences[i:i+500]))\n",
243 |     "    \n",
244 |     "other_embeddings = np.vstack(list_embeds)\n",
245 |     "\n",
246 |     "import pickle\n",
247 |     "f = open('res/'+media_app+'/other_embeddings.p','wb')\n",
248 |     "pickle.dump(other_embeddings,f)\n",
249 |     "f.close()"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "code",
254 |    "execution_count": 4,
255 |    "metadata": {
256 |     "collapsed": true
257 |    },
258 |    "outputs": [],
259 |    "source": [
260 |     "with open('res/'+media_app+'/dilogues.p','rb') as F:\n",
261 |     "    you_to_other = pickle.load(F)"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": 5,
267 |    "metadata": {},
268 |    "outputs": [
269 |     {
270 |      "name": "stdout",
271 |      "output_type": "stream",
272 |      "text": [
273 |       "0\n",
274 |       "500\n",
275 |       "1000\n",
276 |       "1500\n",
277 |       "2000\n",
278 |       "2500\n",
279 |       "3000\n",
280 |       "3500\n",
281 |       "4000\n",
282 |       "4500\n",
283 |       "5000\n",
284 |       "5500\n",
285 |       "6000\n",
286 |       "6500\n",
287 |       "7000\n",
288 |       "7500\n",
289 |       "8000\n",
290 |       "8500\n",
291 |       "9000\n",
292 |       "9500\n",
293 |       "10000\n",
294 |       "10500\n",
295 |       "11000\n"
296 |      ]
297 |     }
298 |    ],
299 |    "source": [
300 |     "keys = list(you_to_other.keys())\n",
301 |     "\n",
302 |     "list_embeds = []\n",
303 |     "for i in range(0,len(keys),500):\n",
304 |     "    print(i)\n",
305 |     "    list_embeds.append(embed_sentence_lite(keys[i:i+500]))\n",
306 |     "    \n",
307 |     "key_embeddings = np.vstack(list_embeds)\n",
308 |     "\n",
309 |     "import pickle\n",
310 |     "f = open('res/'+media_app+'/key_embeddings.p','wb')\n",
311 |     "pickle.dump(key_embeddings,f)\n",
312 |     "f.close()"
313 |    ]
314 |   }
315 |  ],
316 |  "metadata": {
317 |   "kernelspec": {
318 |    "display_name": "Python 3",
319 |    "language": "python",
320 |    "name": "python3"
321 |   },
322 |   "language_info": {
323 |    "codemirror_mode": {
324 |     "name": "ipython",
325 |     "version": 3
326 |    },
327 |    "file_extension": ".py",
328 |    "mimetype": "text/x-python",
329 |    "name": "python",
330 |    "nbconvert_exporter": "python",
331 |    "pygments_lexer": "ipython3",
332 |    "version": "3.6.2"
333 |   }
334 |  },
335 |  "nbformat": 4,
336 |  "nbformat_minor": 2
337 | }
338 | 


--------------------------------------------------------------------------------