├── 1_summary.pdf ├── .ipynb_checkpoints ├── invoice-checkpoint.pdf ├── summary-checkpoint.pdf ├── invoice2-checkpoint.pdf ├── invoice3-checkpoint.pdf ├── invoice4-checkpoint.pdf ├── invoice5-checkpoint.pdf ├── Readme-checkpoint.md └── report-checkpoint.ipynb ├── Readme.md └── report.ipynb /1_summary.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MithilShah/medical_notes_generator/HEAD/1_summary.pdf -------------------------------------------------------------------------------- /.ipynb_checkpoints/invoice-checkpoint.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MithilShah/medical_notes_generator/HEAD/.ipynb_checkpoints/invoice-checkpoint.pdf -------------------------------------------------------------------------------- /.ipynb_checkpoints/summary-checkpoint.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MithilShah/medical_notes_generator/HEAD/.ipynb_checkpoints/summary-checkpoint.pdf -------------------------------------------------------------------------------- /.ipynb_checkpoints/invoice2-checkpoint.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MithilShah/medical_notes_generator/HEAD/.ipynb_checkpoints/invoice2-checkpoint.pdf -------------------------------------------------------------------------------- /.ipynb_checkpoints/invoice3-checkpoint.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MithilShah/medical_notes_generator/HEAD/.ipynb_checkpoints/invoice3-checkpoint.pdf -------------------------------------------------------------------------------- /.ipynb_checkpoints/invoice4-checkpoint.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MithilShah/medical_notes_generator/HEAD/.ipynb_checkpoints/invoice4-checkpoint.pdf -------------------------------------------------------------------------------- /.ipynb_checkpoints/invoice5-checkpoint.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MithilShah/medical_notes_generator/HEAD/.ipynb_checkpoints/invoice5-checkpoint.pdf -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | ## About the medical notes generator 2 | 3 | It is often difficult to find de-identified medical notes that can be used for product demonstration or for testing NLP algorithms. This library uses [GPT-2](https://huggingface.co/transformers/model_doc/gpt2.html) from the transformers package to generate medical text. It also randomly generates patient name, age and gender. 4 | 5 | To use the generator run the cells in report.ipynb A good place to run the generator is [Amazon SageMaker](https://aws.amazon.com/sagemaker/) -------------------------------------------------------------------------------- /.ipynb_checkpoints/Readme-checkpoint.md: -------------------------------------------------------------------------------- 1 | ## About the medical notes generator 2 | 3 | It is often difficult to find de-identified medical notes that can be used for product demonstration or for testing NLP algorithms. This library uses [GPT-2](https://huggingface.co/transformers/model_doc/gpt2.html) from the transformers package to generate medical text. It also randomly generates patient name, age and gender. 4 | 5 | To use the generator run the cells in report.ipynb A good place to run the generator is [Amazon SageMaker](https://aws.amazon.com/sagemaker/) -------------------------------------------------------------------------------- /report.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "!pip3 install fpdf\n", 10 | "!pip3 install names\n", 11 | "!pip3 install icd10-cm\n", 12 | "!pip3 install transformers\n", 13 | "!pip3 install tensorflow==2.1" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 2, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "no_of_reports = 10" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 3, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "name": "stdout", 32 | "output_type": "stream", 33 | "text": [ 34 | "09/02/1985\n" 35 | ] 36 | } 37 | ], 38 | "source": [ 39 | "#random dates\n", 40 | "import random\n", 41 | "import time\n", 42 | "\n", 43 | "def str_time_prop(start, end, format, prop):\n", 44 | " \"\"\"Get a time at a proportion of a range of two formatted times.\n", 45 | "\n", 46 | " start and end should be strings specifying times formated in the\n", 47 | " given format (strftime-style), giving an interval [start, end].\n", 48 | " prop specifies how a proportion of the interval to be taken after\n", 49 | " start. The returned time will be in the specified format.\n", 50 | " \"\"\"\n", 51 | "\n", 52 | " stime = time.mktime(time.strptime(start, format))\n", 53 | " etime = time.mktime(time.strptime(end, format))\n", 54 | "\n", 55 | " ptime = stime + prop * (etime - stime)\n", 56 | "\n", 57 | " return time.strftime(format, time.localtime(ptime))\n", 58 | "\n", 59 | "\n", 60 | "def random_date(start, end, prop):\n", 61 | " return str_time_prop(start, end, '%m/%d/%Y', prop)\n", 62 | "\n", 63 | "print(random_date(\"1/1/1960\", \"1/1/2002\", random.random()))" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 4, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "import csv\n", 73 | "drugs = list(csv.reader(open('drug/Products.txt', 'r'), delimiter='\\t'))" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "import tensorflow as tf\n", 83 | "from transformers import TFGPT2LMHeadModel, GPT2Tokenizer\n", 84 | "\n", 85 | "\n", 86 | "tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n", 87 | "\n", 88 | "# add the EOS token as PAD token to avoid warnings\n", 89 | "model = TFGPT2LMHeadModel.from_pretrained(\"gpt2\", pad_token_id=tokenizer.eos_token_id)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "import os\n", 99 | "from fpdf import FPDF\n", 100 | "import random\n", 101 | "import names\n", 102 | "\n", 103 | "for j in range(1,int(no_of_reports+1)):\n", 104 | " pdf = FPDF(format = 'A4')\n", 105 | " pdf.add_page()\n", 106 | " pdf.set_font('helvetica', '', 13.0)\n", 107 | " pdf.set_xy(60,8)\n", 108 | " pdf.cell(ln=1, h=22.0, align='C', w=75.0, txt='Discharge Summary (Synthetically generated, contains meaningless text)', border=0)\n", 109 | " pdf.set_x(25)\n", 110 | " pdf.set_font('helvetica', '', 10.0)\n", 111 | " gender = \"male\" if random.random() > 0.5 else \"female\"\n", 112 | "\n", 113 | " pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Name', border=1)\n", 114 | " pdf.cell(ln=1, h=8.0, align='L', w=75, txt=names.get_full_name(gender=gender), border=1)\n", 115 | "\n", 116 | "\n", 117 | " pdf.set_x(25)\n", 118 | " pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Birth Date', border=1)\n", 119 | " pdf.cell(ln=1, h=8.0, align='L', w=75, txt=random_date(\"1/1/1960\", \"1/1/2002\", random.random()), border=1)\n", 120 | "\n", 121 | " pdf.set_x(25)\n", 122 | " pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Gender', border=1)\n", 123 | " pdf.cell(ln=1, h=8.0, align='L', w=75, txt=gender, border=1)\n", 124 | "\n", 125 | " pdf.set_x(25)\n", 126 | " pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Post Code', border=1)\n", 127 | " pdf.cell(ln=1, h=8.0, align='L', w=75, txt=str(int(4000*random.random())), border=1)\n", 128 | "\n", 129 | " pdf.dashed_line(20, 65, 180, 65, dash_length = 1, space_length = 1)\n", 130 | "\n", 131 | " pdf.set_xy(25,70)\n", 132 | " pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Admission Date', border=1)\n", 133 | " pdf.cell(ln=1, h=8.0, align='L', w=75, txt=random_date(\"01/01/2020\", \"01/01/2020\", random.random()), border=1)\n", 134 | "\n", 135 | "\n", 136 | " pdf.set_x(25)\n", 137 | " pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Discharge Date', border=1)\n", 138 | " pdf.cell(ln=1, h=8.0, align='L', w=75, txt=random_date(\"01/02/2020\", \"02/01/2020\", random.random()), border=1)\n", 139 | "\n", 140 | " pdf.dashed_line(20, 90, 180, 90, dash_length = 1, space_length = 1)\n", 141 | " pdf.set_x(60)\n", 142 | " pdf.set_font('helvetica', '', 13.0)\n", 143 | " pdf.cell(ln=1, h=13.0, align='C', w=75.0, txt='Medications', border=0)\n", 144 | "\n", 145 | " pdf.set_font('helvetica', '', 10.0)\n", 146 | "\n", 147 | " for i in range(1,5):\n", 148 | " pdf.set_x(25)\n", 149 | " pdf.cell(ln=1, h=4.0, align='L', w=120, txt=drugs[int(random.random()*41070)+1][5], border=1)\n", 150 | "\n", 151 | " pdf.set_font('helvetica', '', 10.0)\n", 152 | "\n", 153 | " pdf.set_line_width(0.0)\n", 154 | " pdf.rect(15.0, 15.0, 170.0, 245.0)\n", 155 | "\n", 156 | "\n", 157 | " codes = list(['I30','I31','I32','I33','I34','I35','I36','I37','I38','I39',\n", 158 | " 'H05','H10','H12','H54','H34','H22','H44','H01','H24','H55',\n", 159 | " 'M05','M10','M12','M54','M34','M22','M44','M01','M24','M55'])\n", 160 | " import icd10\n", 161 | "\n", 162 | " for i in range(1,3):\n", 163 | " code = icd10.find(codes[int(random.random()*28)])\n", 164 | " if not code:\n", 165 | " continue\n", 166 | " print(code.description)\n", 167 | " input_ids = tokenizer.encode('The patient was diagnosed with ' + code.description +\" and showed symptoms of\", return_tensors='tf')\n", 168 | " sample_outputs = model.generate(input_ids,do_sample=True, max_length=100, top_k=100, top_p=0.85, num_return_sequences=1)\n", 169 | " output = tokenizer.decode(sample_outputs[0], skip_special_tokens=True).replace(\"\\r\\n\", \"\").replace('\\n\\n', '').replace('\\n', '')\n", 170 | " pdf.set_xy(25,65+(i*60))\n", 171 | " pdf.multi_cell( h=4.0, align='L', w=160, txt=output, border=0)\n", 172 | " input_ids = tokenizer.encode('The patient was given ' + drugs[int(random.random()*41070)+1][5], return_tensors='tf')\n", 173 | " sample_outputs = model.generate(input_ids,do_sample=True, max_length=100, top_k=100, top_p=0.85, num_return_sequences=1)\n", 174 | " output = tokenizer.decode(sample_outputs[0], skip_special_tokens=True).replace(\"\\r\\n\", \"\").replace('\\n\\n', '').replace('\\n', '')\n", 175 | " pdf.set_xy(25,100+(i*60))\n", 176 | " pdf.multi_cell( h=4.0, align='L', w=160, txt=output, border=0)\n", 177 | " pdf.output(f\"./{j}_summary.pdf\", 'F')\n" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [] 186 | } 187 | ], 188 | "metadata": { 189 | "kernelspec": { 190 | "display_name": "conda_python3", 191 | "language": "python", 192 | "name": "conda_python3" 193 | }, 194 | "language_info": { 195 | "codemirror_mode": { 196 | "name": "ipython", 197 | "version": 3 198 | }, 199 | "file_extension": ".py", 200 | "mimetype": "text/x-python", 201 | "name": "python", 202 | "nbconvert_exporter": "python", 203 | "pygments_lexer": "ipython3", 204 | "version": "3.6.10" 205 | } 206 | }, 207 | "nbformat": 4, 208 | "nbformat_minor": 4 209 | } 210 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/report-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "!pip3 install fpdf\n", 10 | "!pip3 install names\n", 11 | "!pip3 install icd10-cm\n", 12 | "!pip3 install transformers\n", 13 | "!pip3 install tensorflow==2.1" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 7, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "no_of_reports = 10" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "name": "stdout", 32 | "output_type": "stream", 33 | "text": [ 34 | "03/24/1972\n" 35 | ] 36 | } 37 | ], 38 | "source": [ 39 | "#random dates\n", 40 | "import random\n", 41 | "import time\n", 42 | "\n", 43 | "def str_time_prop(start, end, format, prop):\n", 44 | " \"\"\"Get a time at a proportion of a range of two formatted times.\n", 45 | "\n", 46 | " start and end should be strings specifying times formated in the\n", 47 | " given format (strftime-style), giving an interval [start, end].\n", 48 | " prop specifies how a proportion of the interval to be taken after\n", 49 | " start. The returned time will be in the specified format.\n", 50 | " \"\"\"\n", 51 | "\n", 52 | " stime = time.mktime(time.strptime(start, format))\n", 53 | " etime = time.mktime(time.strptime(end, format))\n", 54 | "\n", 55 | " ptime = stime + prop * (etime - stime)\n", 56 | "\n", 57 | " return time.strftime(format, time.localtime(ptime))\n", 58 | "\n", 59 | "\n", 60 | "def random_date(start, end, prop):\n", 61 | " return str_time_prop(start, end, '%m/%d/%Y', prop)\n", 62 | "\n", 63 | "print(random_date(\"1/1/1960\", \"1/1/2002\", random.random()))" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 3, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "import csv\n", 73 | "drugs = list(csv.reader(open('drug/Products.txt', 'r'), delimiter='\\t'))" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 4, 79 | "metadata": {}, 80 | "outputs": [ 81 | { 82 | "data": { 83 | "application/vnd.jupyter.widget-view+json": { 84 | "model_id": "4b348cf4465242538ad0c5e2188c6556", 85 | "version_major": 2, 86 | "version_minor": 0 87 | }, 88 | "text/plain": [ 89 | "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1042301.0, style=ProgressStyle(descript…" 90 | ] 91 | }, 92 | "metadata": {}, 93 | "output_type": "display_data" 94 | }, 95 | { 96 | "name": "stdout", 97 | "output_type": "stream", 98 | "text": [ 99 | "\n" 100 | ] 101 | }, 102 | { 103 | "data": { 104 | "application/vnd.jupyter.widget-view+json": { 105 | "model_id": "6df689b618b34e4785cb178774ae670c", 106 | "version_major": 2, 107 | "version_minor": 0 108 | }, 109 | "text/plain": [ 110 | "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=456318.0, style=ProgressStyle(descripti…" 111 | ] 112 | }, 113 | "metadata": {}, 114 | "output_type": "display_data" 115 | }, 116 | { 117 | "name": "stdout", 118 | "output_type": "stream", 119 | "text": [ 120 | "\n" 121 | ] 122 | }, 123 | { 124 | "data": { 125 | "application/vnd.jupyter.widget-view+json": { 126 | "model_id": "c837c2a6994b4fcc8d1d2663cc73a375", 127 | "version_major": 2, 128 | "version_minor": 0 129 | }, 130 | "text/plain": [ 131 | "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=665.0, style=ProgressStyle(description_…" 132 | ] 133 | }, 134 | "metadata": {}, 135 | "output_type": "display_data" 136 | }, 137 | { 138 | "name": "stdout", 139 | "output_type": "stream", 140 | "text": [ 141 | "\n" 142 | ] 143 | }, 144 | { 145 | "data": { 146 | "application/vnd.jupyter.widget-view+json": { 147 | "model_id": "234ef9a023eb41e6855cdd52ec9668a0", 148 | "version_major": 2, 149 | "version_minor": 0 150 | }, 151 | "text/plain": [ 152 | "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=497933648.0, style=ProgressStyle(descri…" 153 | ] 154 | }, 155 | "metadata": {}, 156 | "output_type": "display_data" 157 | }, 158 | { 159 | "name": "stdout", 160 | "output_type": "stream", 161 | "text": [ 162 | "\n" 163 | ] 164 | }, 165 | { 166 | "name": "stderr", 167 | "output_type": "stream", 168 | "text": [ 169 | "All model checkpoint weights were used when initializing TFGPT2LMHeadModel.\n", 170 | "\n", 171 | "All the weights of TFGPT2LMHeadModel were initialized from the model checkpoint at gpt2.\n", 172 | "If your task is similar to the task the model of the ckeckpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.\n" 173 | ] 174 | } 175 | ], 176 | "source": [ 177 | "import tensorflow as tf\n", 178 | "from transformers import TFGPT2LMHeadModel, GPT2Tokenizer\n", 179 | "\n", 180 | "\n", 181 | "tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n", 182 | "\n", 183 | "# add the EOS token as PAD token to avoid warnings\n", 184 | "model = TFGPT2LMHeadModel.from_pretrained(\"gpt2\", pad_token_id=tokenizer.eos_token_id)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 10, 190 | "metadata": {}, 191 | "outputs": [ 192 | { 193 | "name": "stdout", 194 | "output_type": "stream", 195 | "text": [ 196 | "Conjunctivitis\n", 197 | "Disorders of globe\n", 198 | "Acute pericarditis\n", 199 | "Other diseases of pericardium\n" 200 | ] 201 | } 202 | ], 203 | "source": [ 204 | "import os\n", 205 | "from fpdf import FPDF\n", 206 | "import random\n", 207 | "import names\n", 208 | "\n", 209 | "for j in range(1,int(no_of_reports+1)):\n", 210 | " pdf = FPDF(format = 'A4')\n", 211 | " pdf.add_page()\n", 212 | " pdf.set_font('helvetica', '', 13.0)\n", 213 | " pdf.set_xy(60,8)\n", 214 | " pdf.cell(ln=1, h=22.0, align='C', w=75.0, txt='Discharge Summary (Synthetically generated, contains meaningless text)', border=0)\n", 215 | " pdf.set_x(25)\n", 216 | " pdf.set_font('helvetica', '', 10.0)\n", 217 | " gender = \"male\" if random.random() > 0.5 else \"female\"\n", 218 | "\n", 219 | " pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Name', border=1)\n", 220 | " pdf.cell(ln=1, h=8.0, align='L', w=75, txt=names.get_full_name(gender=gender), border=1)\n", 221 | "\n", 222 | "\n", 223 | " pdf.set_x(25)\n", 224 | " pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Birth Date', border=1)\n", 225 | " pdf.cell(ln=1, h=8.0, align='L', w=75, txt=random_date(\"1/1/1960\", \"1/1/2002\", random.random()), border=1)\n", 226 | "\n", 227 | " pdf.set_x(25)\n", 228 | " pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Gender', border=1)\n", 229 | " pdf.cell(ln=1, h=8.0, align='L', w=75, txt=gender, border=1)\n", 230 | "\n", 231 | " pdf.set_x(25)\n", 232 | " pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Post Code', border=1)\n", 233 | " pdf.cell(ln=1, h=8.0, align='L', w=75, txt=str(int(4000*random.random())), border=1)\n", 234 | "\n", 235 | " pdf.dashed_line(20, 65, 180, 65, dash_length = 1, space_length = 1)\n", 236 | "\n", 237 | " pdf.set_xy(25,70)\n", 238 | " pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Admission Date', border=1)\n", 239 | " pdf.cell(ln=1, h=8.0, align='L', w=75, txt=random_date(\"01/01/2020\", \"01/01/2020\", random.random()), border=1)\n", 240 | "\n", 241 | "\n", 242 | " pdf.set_x(25)\n", 243 | " pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Discharge Date', border=1)\n", 244 | " pdf.cell(ln=1, h=8.0, align='L', w=75, txt=random_date(\"01/02/2020\", \"02/01/2020\", random.random()), border=1)\n", 245 | "\n", 246 | " pdf.dashed_line(20, 90, 180, 90, dash_length = 1, space_length = 1)\n", 247 | " pdf.set_x(60)\n", 248 | " pdf.set_font('helvetica', '', 13.0)\n", 249 | " pdf.cell(ln=1, h=13.0, align='C', w=75.0, txt='Medications', border=0)\n", 250 | "\n", 251 | " pdf.set_font('helvetica', '', 10.0)\n", 252 | "\n", 253 | " for i in range(1,5):\n", 254 | " pdf.set_x(25)\n", 255 | " pdf.cell(ln=1, h=4.0, align='L', w=120, txt=drugs[int(random.random()*41070)+1][5], border=1)\n", 256 | "\n", 257 | " pdf.set_font('helvetica', '', 10.0)\n", 258 | "\n", 259 | " pdf.set_line_width(0.0)\n", 260 | " pdf.rect(15.0, 15.0, 170.0, 245.0)\n", 261 | "\n", 262 | "\n", 263 | " codes = list(['I30','I31','I32','I33','I34','I35','I36','I37','I38','I39',\n", 264 | " 'H05','H10','H12','H54','H34','H22','H44','H01','H24','H55',\n", 265 | " 'M05','M10','M12','M54','M34','M22','M44','M01','M24','M55'])\n", 266 | " import icd10\n", 267 | "\n", 268 | " for i in range(1,3):\n", 269 | " code = icd10.find(codes[int(random.random()*28)])\n", 270 | " if not code:\n", 271 | " continue\n", 272 | " print(code.description)\n", 273 | " input_ids = tokenizer.encode('The patient was diagnosed with ' + code.description +\" and showed symptoms of\", return_tensors='tf')\n", 274 | " sample_outputs = model.generate(input_ids,do_sample=True, max_length=100, top_k=100, top_p=0.85, num_return_sequences=1)\n", 275 | " output = tokenizer.decode(sample_outputs[0], skip_special_tokens=True).replace(\"\\r\\n\", \"\").replace('\\n\\n', '').replace('\\n', '')\n", 276 | " pdf.set_xy(25,65+(i*60))\n", 277 | " pdf.multi_cell( h=4.0, align='L', w=160, txt=output, border=0)\n", 278 | " input_ids = tokenizer.encode('The patient was given ' + drugs[int(random.random()*41070)+1][5], return_tensors='tf')\n", 279 | " sample_outputs = model.generate(input_ids,do_sample=True, max_length=100, top_k=100, top_p=0.85, num_return_sequences=1)\n", 280 | " output = tokenizer.decode(sample_outputs[0], skip_special_tokens=True).replace(\"\\r\\n\", \"\").replace('\\n\\n', '').replace('\\n', '')\n", 281 | " pdf.set_xy(25,100+(i*60))\n", 282 | " pdf.multi_cell( h=4.0, align='L', w=160, txt=output, border=0)\n", 283 | " pdf.output(f\"./{j}_summary.pdf\", 'F')\n" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [] 292 | } 293 | ], 294 | "metadata": { 295 | "kernelspec": { 296 | "display_name": "conda_python3", 297 | "language": "python", 298 | "name": "conda_python3" 299 | }, 300 | "language_info": { 301 | "codemirror_mode": { 302 | "name": "ipython", 303 | "version": 3 304 | }, 305 | "file_extension": ".py", 306 | "mimetype": "text/x-python", 307 | "name": "python", 308 | "nbconvert_exporter": "python", 309 | "pygments_lexer": "ipython3", 310 | "version": "3.6.10" 311 | } 312 | }, 313 | "nbformat": 4, 314 | "nbformat_minor": 4 315 | } 316 | --------------------------------------------------------------------------------