├── Me_Bot.ipynb ├── README.md ├── clean_wechat_chats.py ├── clean_whatsapp_chats.py ├── pictures └── MM.sqlite.png └── prepare_files.ipynb /Me_Bot.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 16, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "INFO:tensorflow:Using /tmp/tfhub_modules to cache modules.\n", 13 | "SentencePiece model loaded at b'/tmp/tfhub_modules/539544f0a997d91c327c23285ea00c37588d92cc/assets/universal_encoder_8k_spm.model'.\n" 14 | ] 15 | } 16 | ], 17 | "source": [ 18 | "import sys\n", 19 | "sys.path.append('/usr/local/lib/python3.5/dist-packages/')\n", 20 | "import tensorflow as tf\n", 21 | "import tensorflow_hub as hub\n", 22 | "import numpy as np\n", 23 | "import os\n", 24 | "import http.client, urllib.request, urllib.parse, urllib.error, base64\n", 25 | "import json\n", 26 | "import warnings\n", 27 | "warnings.filterwarnings(\"ignore\")\n", 28 | "import pickle\n", 29 | "import sentencepiece as spm\n", 30 | "\n", 31 | "\n", 32 | "media_app='whatsapp'# modify your media app here\n", 33 | "\n", 34 | "module_url = \"https://tfhub.dev/google/universal-sentence-encoder-lite/2\"\n", 35 | "embed = hub.Module(module_url)\n", 36 | "tf.logging.set_verbosity(tf.logging.WARN)\n", 37 | "\n", 38 | "module = hub.Module(\"https://tfhub.dev/google/universal-sentence-encoder-lite/2\")\n", 39 | "input_placeholder = tf.sparse_placeholder(tf.int64, shape=[None, None])\n", 40 | "encodings = module(\n", 41 | " inputs=dict(\n", 42 | " values=input_placeholder.values,\n", 43 | " indices=input_placeholder.indices,\n", 44 | " dense_shape=input_placeholder.dense_shape))\n", 45 | "\n", 46 | "with tf.Session() as sess:\n", 47 | " spm_path = sess.run(module(signature=\"spm_path\"))\n", 48 | "\n", 49 | "sp = spm.SentencePieceProcessor()\n", 50 | "sp.Load(spm_path)\n", 51 | "print(\"SentencePiece model loaded at {}.\".format(spm_path))\n", 52 | "\n", 53 | "def process_to_IDs_in_sparse_format(sp, sentences):\n", 54 | " # An utility method that processes sentences with the sentence piece processor\n", 55 | " # 'sp' and returns the results in tf.SparseTensor-similar format:\n", 56 | " # (values, indices, dense_shape)\n", 57 | " ids = [sp.EncodeAsIds(x) for x in sentences]\n", 58 | " max_len = max(len(x) for x in ids)\n", 59 | " dense_shape=(len(ids), max_len)\n", 60 | " values=[item for sublist in ids for item in sublist]\n", 61 | " indices=[[row,col] for row in range(len(ids)) for col in range(len(ids[row]))]\n", 62 | " return (values, indices, dense_shape)\n", 63 | "\n", 64 | "def embed_sentence_lite(sentences):\n", 65 | " messages = sentences\n", 66 | " values, indices, dense_shape = process_to_IDs_in_sparse_format(sp, messages)\n", 67 | "\n", 68 | " # Reduce logging output.\n", 69 | " tf.logging.set_verbosity(tf.logging.ERROR)\n", 70 | "\n", 71 | " with tf.Session() as session:\n", 72 | " session.run([tf.global_variables_initializer(), tf.tables_initializer()])\n", 73 | " message_embeddings = session.run(\n", 74 | " encodings,\n", 75 | " feed_dict={input_placeholder.values: values,\n", 76 | " input_placeholder.indices: indices,\n", 77 | " input_placeholder.dense_shape: dense_shape})\n", 78 | " \n", 79 | " return message_embeddings" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 18, 85 | "metadata": { 86 | "collapsed": true 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "def find_closest(sentence_rep,query_rep,K):\n", 91 | " top_K = np.argsort(np.sqrt((np.sum(np.square(sentence_rep - query_rep),axis=1))))[:K]\n", 92 | " return top_K" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 46, 98 | "metadata": { 99 | "collapsed": true 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "import pickle\n", 104 | "f = open('res/'+media_app+'/other_embeddings.p','rb')\n", 105 | "other_embeddings = pickle.load(f)\n", 106 | "f.close()\n", 107 | "\n", 108 | "f = open('res/'+media_app+'/your_embeddings.p','rb')\n", 109 | "your_embeddings = pickle.load(f)\n", 110 | "f.close()\n", 111 | "\n", 112 | "f = open('res/'+media_app+'/dilogues.p','rb')\n", 113 | "pr_to_sp = pickle.load(f)\n", 114 | "f.close()\n", 115 | "\n", 116 | "\n", 117 | "f = open('res/'+media_app+'/your_sents.p','rb')\n", 118 | "your_sentences = pickle.load(f)\n", 119 | "f.close()" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 27, 125 | "metadata": { 126 | "collapsed": true 127 | }, 128 | "outputs": [], 129 | "source": [ 130 | "keys = list(pr_to_sp.keys())" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 28, 136 | "metadata": { 137 | "collapsed": true 138 | }, 139 | "outputs": [], 140 | "source": [ 141 | "f = open('res/'+media_app+'/key_embeddings.p','rb')\n", 142 | "key_embeddings = pickle.load(f)\n", 143 | "f.close()" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 81, 149 | "metadata": { 150 | "collapsed": true 151 | }, 152 | "outputs": [], 153 | "source": [ 154 | "def speak_like_me(query,K,your_embeddings,other_embeddings,your_sen):\n", 155 | " other_query = [query]\n", 156 | " query_embedding = embed_sentence_lite(other_query)\n", 157 | " closest_your = find_closest(your_embeddings,query_embedding,K)\n", 158 | " for cl in closest_your:\n", 159 | " print(your_sentences[cl])" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 76, 165 | "metadata": { 166 | "collapsed": true 167 | }, 168 | "outputs": [], 169 | "source": [ 170 | "def respond_like_me(query,K,key_embeddings,keys):\n", 171 | " other_query = [query]\n", 172 | " query_embedding = embed_sentence_lite(other_query)\n", 173 | " closest_other = find_closest(key_embeddings,query_embedding,K+2)\n", 174 | " for k in closest_other[3:]:\n", 175 | " print(pr_to_sp[keys[k]])" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 79, 181 | "metadata": {}, 182 | "outputs": [ 183 | { 184 | "name": "stdout", 185 | "output_type": "stream", 186 | "text": [ 187 | "Work time now\n", 188 | "\n", 189 | "Potty :P\n", 190 | "\n", 191 | "Probably the first time you'll hear me say jt\n", 192 | "\n" 193 | ] 194 | } 195 | ], 196 | "source": [ 197 | "respond_like_me(\"What's up?\",4,key_embeddings,keys)" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 82, 203 | "metadata": {}, 204 | "outputs": [ 205 | { 206 | "name": "stdout", 207 | "output_type": "stream", 208 | "text": [ 209 | "So so hungry\n", 210 | "\n", 211 | "Reeeaaaallly hungry\n", 212 | "\n", 213 | "I am in the mood to eat\n", 214 | "\n", 215 | "I want to eat that so badly. 😣\n", 216 | "\n", 217 | "I want that food\n", 218 | "\n" 219 | ] 220 | } 221 | ], 222 | "source": [ 223 | "speak_like_me(\"I am so hungry\",5,your_embeddings,other_embeddings,your_sentences)" 224 | ] 225 | } 226 | ], 227 | "metadata": { 228 | "kernelspec": { 229 | "display_name": "Python 3", 230 | "language": "python", 231 | "name": "python3" 232 | }, 233 | "language_info": { 234 | "codemirror_mode": { 235 | "name": "ipython", 236 | "version": 3 237 | }, 238 | "file_extension": ".py", 239 | "mimetype": "text/x-python", 240 | "name": "python", 241 | "nbconvert_exporter": "python", 242 | "pygments_lexer": "ipython3", 243 | "version": "3.6.2" 244 | } 245 | }, 246 | "nbformat": 4, 247 | "nbformat_minor": 2 248 | } 249 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Me_Bot 2 | A simple tool to make a bot that speaks like you, simply learning from your WhatsApp Chats. 3 | 4 | Instructions:- 5 | 6 | 1. From WhatsApp on your phone, go to any chat and export it by going into the settings. Move the txt file that you receive inside the Me_Bot folder. 7 | 8 | 2. Run the clean_whatsapp_chats.py script using the command. Before running, change the names of the people by changing YOUR_NAME and OTHER_NAME in the scripts according to the txt file you have for your chats. 9 | 10 | `python clean_whatsapp_chats.py whatsapp_chat.txt` 11 | 12 | 3. Run the prepare_files.ipynb ipython notebook. 13 | 14 | 4. Run the Me_Bot.ipynb file and you can play with the bot at the bottom! 15 | 16 | NOTE - Actively seeking collaborators for fun side projects like this. If you're itnerested, please drop me a mail at smadan@mit.edu 17 | 18 | ## For wechat user: 19 | Wechat chat history is save in SQLite Database, therefore you need to export from you Phone. 20 | Basically, if you have a iPhone, there are the steps to get the database file: 21 | 1. Use iTunes to backup your phone (unselect encrypt backup) 22 | 2. Use iTools to open the backup file and get a copy of your database file named MM.sqlite 23 | 3. run `python clean_wechat_chats.py YOUR_DATABASE_PATH YOUR_FRIEND_ID` 24 | 25 | Find more detail information about this [here](https://www.cnblogs.com/cxun/p/5677606.html) 26 | 27 | ![MM.sqlite](https://github.com/DH-Diego/Me_Bot/blob/master/pictures/MM.sqlite.png) 28 | 29 | This is the database, each table starts with 'Chat_' corresponding to a chat history with a friend, you need to find the table id you want to generate the chat robot to run the script above. Currently only English chat is supported, Chinese version is under constructing. 30 | 31 | -------------------------------------------------------------------------------- /clean_wechat_chats.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import sqlite3 4 | import sys 5 | 6 | chat_db_file = sys.argv[1] 7 | friend_id = sys.argv[2] 8 | conn = sqlite3.connect(chat_db_file) 9 | cursor = conn.cursor() 10 | cursor.execute('select CreateTime, Message, Status, Type from ' + friend_id) 11 | content = cursor.fetchall() 12 | all_text = [] 13 | your_sents = [] 14 | other_sents = [] 15 | 16 | prev_pr_to_sp = {} 17 | prev = None 18 | for line in content: 19 | createTime = line[0] 20 | message = line[1] # 'message content' 21 | status = line[2] # 'status=2 or 3 means message from yourself from phone or computer, status=4 or 5 means message from other' 22 | type_ = line[3] # 'type = 1 means text message, type = 47 mean emoji (not sure ), type=10000 means link(not sure)'' 23 | # print(message) 24 | # print(status) 25 | 26 | 27 | if type_ !=1: 28 | continue 29 | if status == 2 or status == 3: 30 | your_sents.append(message) 31 | all_text.append(message) 32 | if prev == 'None': 33 | continue 34 | if prev == 'pr': 35 | prev_pr_to_sp[other_sents[-1]] = message 36 | prev = 'sp' 37 | elif status == 4 or status == 5: 38 | other_sents.append(message) 39 | all_text.append(message) 40 | prev = 'pr' 41 | else: 42 | print(line) 43 | all_text[-1] += message 44 | if prev == 'sp': 45 | your_sents[-1] += message 46 | elif prev == 'pr': 47 | other_sents[-1] += message 48 | 49 | if not os.path.isdir('res/wechat'): 50 | if not os.path.isdir('res'): 51 | os.mkdir('res') 52 | os.mkdir('res/wechat') 53 | 54 | 55 | f = open('res/wechat/dilogues.p', 'wb') 56 | pickle.dump(prev_pr_to_sp, f) 57 | f.close() 58 | 59 | f = open('res/wechat/all_text.p', 'wb') 60 | pickle.dump(all_text, f) 61 | f.close() 62 | 63 | f = open('res/wechat/your_sents.p', 'wb') 64 | pickle.dump(your_sents, f) 65 | f.close() 66 | 67 | f = open('res/wechat/other_sents.p', 'wb') 68 | pickle.dump(other_sents, f) 69 | f.close() 70 | -------------------------------------------------------------------------------- /clean_whatsapp_chats.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import random 3 | import sys 4 | import os 5 | chat_file = sys.argv[1] 6 | 7 | f = open(chat_file,'r', encoding="utf8") 8 | content = f.readlines() 9 | all_text = [] 10 | your_sents = [] 11 | other_sents = [] 12 | 13 | YOUR_NAME = 'YOUR NAME HERE' 14 | OTHER_NAME = 'OTHER NAME HERE' 15 | 16 | prev_pr_to_sp = {} 17 | prev = None 18 | for line in content[1:]: 19 | if 'Missed Voice Call' in line: 20 | continue 21 | if 'image omitted' in line: 22 | continue 23 | if ' %s: '%YOUR_NAME in line: 24 | text = line.split(' %s: '%YOUR_NAME)[-1] 25 | your_sents.append(text) 26 | all_text.append(text) 27 | if prev == 'None': 28 | continue 29 | if prev == 'pr': 30 | prev_pr_to_sp[other_sents[-1]] = text 31 | prev = 'sp' 32 | elif ' %s: '%OTHER_NAME in line: 33 | text = line.split(' %s: '%OTHER_NAME)[-1] 34 | other_sents.append(text) 35 | all_text.append(text) 36 | prev = 'pr' 37 | else: 38 | print(line) 39 | all_text[-1] += line 40 | 41 | if prev == 'sp': 42 | your_sents[-1] += line 43 | elif prev == 'pr': 44 | other_sents[-1] += line 45 | 46 | if not os.path.isdir('res/whatsapp'): 47 | if not os.path.isdir('res'): 48 | os.mkdir('res') 49 | os.mkdir('res/whatsapp') 50 | 51 | 52 | f = open('res/whatsapp/dilogues.p', 'wb') 53 | pickle.dump(prev_pr_to_sp, f) 54 | f.close() 55 | 56 | f = open('res/whatsapp/all_text.p', 'wb') 57 | pickle.dump(all_text, f) 58 | f.close() 59 | 60 | f = open('res/whatsapp/your_sents.p', 'wb') 61 | pickle.dump(your_sents, f) 62 | f.close() 63 | 64 | f = open('res/whatsapp/other_sents.p', 'wb') 65 | pickle.dump(other_sents, f) 66 | f.close() 67 | -------------------------------------------------------------------------------- /pictures/MM.sqlite.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Spandan-Madan/Me_Bot/56d60c6925d8aaadcc8122db1dbc6cfdd362389a/pictures/MM.sqlite.png -------------------------------------------------------------------------------- /prepare_files.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "INFO:tensorflow:Using /tmp/tfhub_modules to cache modules.\n", 13 | "SentencePiece model loaded at b'/tmp/tfhub_modules/539544f0a997d91c327c23285ea00c37588d92cc/assets/universal_encoder_8k_spm.model'.\n" 14 | ] 15 | } 16 | ], 17 | "source": [ 18 | "import sys\n", 19 | "sys.path.append('/usr/local/lib/python3.5/dist-packages/')\n", 20 | "import tensorflow as tf\n", 21 | "import tensorflow_hub as hub\n", 22 | "import numpy as np\n", 23 | "import os\n", 24 | "import http.client, urllib.request, urllib.parse, urllib.error, base64\n", 25 | "import json\n", 26 | "import warnings\n", 27 | "warnings.filterwarnings(\"ignore\")\n", 28 | "import pickle\n", 29 | "import sentencepiece as spm\n", 30 | "\n", 31 | "media_app='whatsapp'# modify your media app here\n", 32 | "module_url = \"https://tfhub.dev/google/universal-sentence-encoder-lite/2\"\n", 33 | "embed = hub.Module(module_url)\n", 34 | "tf.logging.set_verbosity(tf.logging.WARN)\n", 35 | "\n", 36 | "module = hub.Module(\"https://tfhub.dev/google/universal-sentence-encoder-lite/2\")\n", 37 | "input_placeholder = tf.sparse_placeholder(tf.int64, shape=[None, None])\n", 38 | "encodings = module(\n", 39 | " inputs=dict(\n", 40 | " values=input_placeholder.values,\n", 41 | " indices=input_placeholder.indices,\n", 42 | " dense_shape=input_placeholder.dense_shape))\n", 43 | "\n", 44 | "with tf.Session() as sess:\n", 45 | " spm_path = sess.run(module(signature=\"spm_path\"))\n", 46 | "\n", 47 | "sp = spm.SentencePieceProcessor()\n", 48 | "sp.Load(spm_path)\n", 49 | "print(\"SentencePiece model loaded at {}.\".format(spm_path))\n", 50 | "\n", 51 | "def process_to_IDs_in_sparse_format(sp, sentences):\n", 52 | " # An utility method that processes sentences with the sentence piece processor\n", 53 | " # 'sp' and returns the results in tf.SparseTensor-similar format:\n", 54 | " # (values, indices, dense_shape)\n", 55 | " ids = [sp.EncodeAsIds(x) for x in sentences]\n", 56 | " max_len = max(len(x) for x in ids)\n", 57 | " dense_shape=(len(ids), max_len)\n", 58 | " values=[item for sublist in ids for item in sublist]\n", 59 | " indices=[[row,col] for row in range(len(ids)) for col in range(len(ids[row]))]\n", 60 | " return (values, indices, dense_shape)\n", 61 | "\n", 62 | "def embed_sentence_lite(sentences):\n", 63 | " messages = sentences\n", 64 | " values, indices, dense_shape = process_to_IDs_in_sparse_format(sp, messages)\n", 65 | "\n", 66 | " # Reduce logging output.\n", 67 | " tf.logging.set_verbosity(tf.logging.ERROR)\n", 68 | "\n", 69 | " with tf.Session() as session:\n", 70 | " session.run([tf.global_variables_initializer(), tf.tables_initializer()])\n", 71 | " message_embeddings = session.run(\n", 72 | " encodings,\n", 73 | " feed_dict={input_placeholder.values: values,\n", 74 | " input_placeholder.indices: indices,\n", 75 | " input_placeholder.dense_shape: dense_shape})\n", 76 | " \n", 77 | " return message_embeddings" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 2, 83 | "metadata": { 84 | "collapsed": true 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "def find_closest(sentence_rep,query_rep,K):\n", 89 | " top_K = np.argsort(np.sqrt((np.sum(np.square(sentence_rep - query_rep),axis=1))))[:K]\n", 90 | " return top_K" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 3, 96 | "metadata": { 97 | "collapsed": true 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "def embed_sentences(sentences):\n", 102 | " message_embeddings = []\n", 103 | " user_embedding = []\n", 104 | " \n", 105 | " with tf.Session() as session:\n", 106 | " session.run([tf.global_variables_initializer(), tf.tables_initializer()])\n", 107 | " message_embeddings = session.run(embed(sentences))\n", 108 | "\n", 109 | " return message_embeddings\n", 110 | "\n", 111 | "def get_sentiments(json_body):\n", 112 | " headers = {\n", 113 | " # Request headers\n", 114 | " 'Content-Type': 'application/json',\n", 115 | " 'Ocp-Apim-Subscription-Key': YOUR_KEY_HERE,\n", 116 | " }\n", 117 | "\n", 118 | " params = urllib.parse.urlencode({})\n", 119 | "\n", 120 | " conn = http.client.HTTPSConnection('westus.api.cognitive.microsoft.com')\n", 121 | " conn.request(\"POST\", \"/text/analytics/v2.0/sentiment?\", json_body, headers)\n", 122 | " response = conn.getresponse()\n", 123 | " data = response.read()\n", 124 | " return data\n" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": { 131 | "collapsed": true 132 | }, 133 | "outputs": [], 134 | "source": [ 135 | "f = open('res/'+media_app+'/your_sents.p','rb')\n", 136 | "your_sentences = pickle.load(f)\n", 137 | "f.close()\n", 138 | "\n", 139 | "list_embeds = []\n", 140 | "for i in range(0,len(your_sentences),500):\n", 141 | " print(i)\n", 142 | " list_embeds.append(embed_sentence_lite(your_sentences[i:i+500]))\n", 143 | " \n", 144 | "your_embeddings = np.vstack(list_embeds)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 6, 150 | "metadata": { 151 | "collapsed": true 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "f = open('res/'+media_app+'/your_embeddings.p','wb')\n", 156 | "pickle.dump(your_embeddings,f)\n", 157 | "f.close()" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 7, 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "name": "stdout", 167 | "output_type": "stream", 168 | "text": [ 169 | "0\n", 170 | "500\n", 171 | "1000\n", 172 | "1500\n", 173 | "2000\n", 174 | "2500\n", 175 | "3000\n", 176 | "3500\n", 177 | "4000\n", 178 | "4500\n", 179 | "5000\n", 180 | "5500\n", 181 | "6000\n", 182 | "6500\n", 183 | "7000\n", 184 | "7500\n", 185 | "8000\n", 186 | "8500\n", 187 | "9000\n", 188 | "9500\n", 189 | "10000\n", 190 | "10500\n", 191 | "11000\n", 192 | "11500\n", 193 | "12000\n", 194 | "12500\n", 195 | "13000\n", 196 | "13500\n", 197 | "14000\n", 198 | "14500\n", 199 | "15000\n", 200 | "15500\n", 201 | "16000\n", 202 | "16500\n", 203 | "17000\n", 204 | "17500\n", 205 | "18000\n", 206 | "18500\n", 207 | "19000\n", 208 | "19500\n", 209 | "20000\n", 210 | "20500\n", 211 | "21000\n", 212 | "21500\n", 213 | "22000\n", 214 | "22500\n", 215 | "23000\n", 216 | "23500\n", 217 | "24000\n", 218 | "24500\n", 219 | "25000\n", 220 | "25500\n", 221 | "26000\n", 222 | "26500\n", 223 | "27000\n", 224 | "27500\n", 225 | "28000\n", 226 | "28500\n", 227 | "29000\n", 228 | "29500\n", 229 | "30000\n", 230 | "30500\n" 231 | ] 232 | } 233 | ], 234 | "source": [ 235 | "f = open('res/'+media_app+'/other_sents.p','rb')\n", 236 | "other_sentences = pickle.load(f)\n", 237 | "f.close()\n", 238 | "\n", 239 | "list_embeds = []\n", 240 | "for i in range(0,len(other_sentences),500):\n", 241 | " print(i)\n", 242 | " list_embeds.append(embed_sentence_lite(other_sentences[i:i+500]))\n", 243 | " \n", 244 | "other_embeddings = np.vstack(list_embeds)\n", 245 | "\n", 246 | "import pickle\n", 247 | "f = open('res/'+media_app+'/other_embeddings.p','wb')\n", 248 | "pickle.dump(other_embeddings,f)\n", 249 | "f.close()" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 4, 255 | "metadata": { 256 | "collapsed": true 257 | }, 258 | "outputs": [], 259 | "source": [ 260 | "with open('res/'+media_app+'/dilogues.p','rb') as F:\n", 261 | " you_to_other = pickle.load(F)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 5, 267 | "metadata": {}, 268 | "outputs": [ 269 | { 270 | "name": "stdout", 271 | "output_type": "stream", 272 | "text": [ 273 | "0\n", 274 | "500\n", 275 | "1000\n", 276 | "1500\n", 277 | "2000\n", 278 | "2500\n", 279 | "3000\n", 280 | "3500\n", 281 | "4000\n", 282 | "4500\n", 283 | "5000\n", 284 | "5500\n", 285 | "6000\n", 286 | "6500\n", 287 | "7000\n", 288 | "7500\n", 289 | "8000\n", 290 | "8500\n", 291 | "9000\n", 292 | "9500\n", 293 | "10000\n", 294 | "10500\n", 295 | "11000\n" 296 | ] 297 | } 298 | ], 299 | "source": [ 300 | "keys = list(you_to_other.keys())\n", 301 | "\n", 302 | "list_embeds = []\n", 303 | "for i in range(0,len(keys),500):\n", 304 | " print(i)\n", 305 | " list_embeds.append(embed_sentence_lite(keys[i:i+500]))\n", 306 | " \n", 307 | "key_embeddings = np.vstack(list_embeds)\n", 308 | "\n", 309 | "import pickle\n", 310 | "f = open('res/'+media_app+'/key_embeddings.p','wb')\n", 311 | "pickle.dump(key_embeddings,f)\n", 312 | "f.close()" 313 | ] 314 | } 315 | ], 316 | "metadata": { 317 | "kernelspec": { 318 | "display_name": "Python 3", 319 | "language": "python", 320 | "name": "python3" 321 | }, 322 | "language_info": { 323 | "codemirror_mode": { 324 | "name": "ipython", 325 | "version": 3 326 | }, 327 | "file_extension": ".py", 328 | "mimetype": "text/x-python", 329 | "name": "python", 330 | "nbconvert_exporter": "python", 331 | "pygments_lexer": "ipython3", 332 | "version": "3.6.2" 333 | } 334 | }, 335 | "nbformat": 4, 336 | "nbformat_minor": 2 337 | } 338 | --------------------------------------------------------------------------------