├── 1_summary.pdf
├── .ipynb_checkpoints
    ├── invoice-checkpoint.pdf
    ├── summary-checkpoint.pdf
    ├── invoice2-checkpoint.pdf
    ├── invoice3-checkpoint.pdf
    ├── invoice4-checkpoint.pdf
    ├── invoice5-checkpoint.pdf
    ├── Readme-checkpoint.md
    └── report-checkpoint.ipynb
├── Readme.md
└── report.ipynb


/1_summary.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MithilShah/medical_notes_generator/HEAD/1_summary.pdf


--------------------------------------------------------------------------------
/.ipynb_checkpoints/invoice-checkpoint.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MithilShah/medical_notes_generator/HEAD/.ipynb_checkpoints/invoice-checkpoint.pdf


--------------------------------------------------------------------------------
/.ipynb_checkpoints/summary-checkpoint.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MithilShah/medical_notes_generator/HEAD/.ipynb_checkpoints/summary-checkpoint.pdf


--------------------------------------------------------------------------------
/.ipynb_checkpoints/invoice2-checkpoint.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MithilShah/medical_notes_generator/HEAD/.ipynb_checkpoints/invoice2-checkpoint.pdf


--------------------------------------------------------------------------------
/.ipynb_checkpoints/invoice3-checkpoint.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MithilShah/medical_notes_generator/HEAD/.ipynb_checkpoints/invoice3-checkpoint.pdf


--------------------------------------------------------------------------------
/.ipynb_checkpoints/invoice4-checkpoint.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MithilShah/medical_notes_generator/HEAD/.ipynb_checkpoints/invoice4-checkpoint.pdf


--------------------------------------------------------------------------------
/.ipynb_checkpoints/invoice5-checkpoint.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MithilShah/medical_notes_generator/HEAD/.ipynb_checkpoints/invoice5-checkpoint.pdf


--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
1 | ## About the medical notes generator
2 | 
3 | It is often difficult to find de-identified medical notes that can be used for product demonstration or for testing NLP algorithms. This library uses [GPT-2](https://huggingface.co/transformers/model_doc/gpt2.html) from the transformers package to generate medical text. It also randomly generates patient name, age and gender.
4 | 
5 | To use the generator run the cells in report.ipynb A good place to run the generator is [Amazon SageMaker](https://aws.amazon.com/sagemaker/) 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/Readme-checkpoint.md:
--------------------------------------------------------------------------------
1 | ## About the medical notes generator
2 | 
3 | It is often difficult to find de-identified medical notes that can be used for product demonstration or for testing NLP algorithms. This library uses [GPT-2](https://huggingface.co/transformers/model_doc/gpt2.html) from the transformers package to generate medical text. It also randomly generates patient name, age and gender.
4 | 
5 | To use the generator run the cells in report.ipynb A good place to run the generator is [Amazon SageMaker](https://aws.amazon.com/sagemaker/) 


--------------------------------------------------------------------------------
/report.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "!pip3 install fpdf\n",
 10 |     "!pip3 install names\n",
 11 |     "!pip3 install icd10-cm\n",
 12 |     "!pip3 install transformers\n",
 13 |     "!pip3 install tensorflow==2.1"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 2,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "no_of_reports = 10"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 3,
 28 |    "metadata": {},
 29 |    "outputs": [
 30 |     {
 31 |      "name": "stdout",
 32 |      "output_type": "stream",
 33 |      "text": [
 34 |       "09/02/1985\n"
 35 |      ]
 36 |     }
 37 |    ],
 38 |    "source": [
 39 |     "#random dates\n",
 40 |     "import random\n",
 41 |     "import time\n",
 42 |     "\n",
 43 |     "def str_time_prop(start, end, format, prop):\n",
 44 |     "    \"\"\"Get a time at a proportion of a range of two formatted times.\n",
 45 |     "\n",
 46 |     "    start and end should be strings specifying times formated in the\n",
 47 |     "    given format (strftime-style), giving an interval [start, end].\n",
 48 |     "    prop specifies how a proportion of the interval to be taken after\n",
 49 |     "    start.  The returned time will be in the specified format.\n",
 50 |     "    \"\"\"\n",
 51 |     "\n",
 52 |     "    stime = time.mktime(time.strptime(start, format))\n",
 53 |     "    etime = time.mktime(time.strptime(end, format))\n",
 54 |     "\n",
 55 |     "    ptime = stime + prop * (etime - stime)\n",
 56 |     "\n",
 57 |     "    return time.strftime(format, time.localtime(ptime))\n",
 58 |     "\n",
 59 |     "\n",
 60 |     "def random_date(start, end, prop):\n",
 61 |     "    return str_time_prop(start, end, '%m/%d/%Y', prop)\n",
 62 |     "\n",
 63 |     "print(random_date(\"1/1/1960\", \"1/1/2002\", random.random()))"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 4,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "import csv\n",
 73 |     "drugs = list(csv.reader(open('drug/Products.txt', 'r'), delimiter='\\t'))"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "import tensorflow as tf\n",
 83 |     "from transformers import TFGPT2LMHeadModel, GPT2Tokenizer\n",
 84 |     "\n",
 85 |     "\n",
 86 |     "tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n",
 87 |     "\n",
 88 |     "# add the EOS token as PAD token to avoid warnings\n",
 89 |     "model = TFGPT2LMHeadModel.from_pretrained(\"gpt2\", pad_token_id=tokenizer.eos_token_id)"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "import os\n",
 99 |     "from fpdf import FPDF\n",
100 |     "import random\n",
101 |     "import names\n",
102 |     "\n",
103 |     "for j in range(1,int(no_of_reports+1)):\n",
104 |     "    pdf = FPDF(format = 'A4')\n",
105 |     "    pdf.add_page()\n",
106 |     "    pdf.set_font('helvetica', '', 13.0)\n",
107 |     "    pdf.set_xy(60,8)\n",
108 |     "    pdf.cell(ln=1, h=22.0, align='C', w=75.0, txt='Discharge Summary (Synthetically generated, contains meaningless text)', border=0)\n",
109 |     "    pdf.set_x(25)\n",
110 |     "    pdf.set_font('helvetica', '', 10.0)\n",
111 |     "    gender = \"male\" if random.random() > 0.5  else \"female\"\n",
112 |     "\n",
113 |     "    pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Name', border=1)\n",
114 |     "    pdf.cell(ln=1, h=8.0, align='L', w=75, txt=names.get_full_name(gender=gender), border=1)\n",
115 |     "\n",
116 |     "\n",
117 |     "    pdf.set_x(25)\n",
118 |     "    pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Birth Date', border=1)\n",
119 |     "    pdf.cell(ln=1, h=8.0, align='L', w=75, txt=random_date(\"1/1/1960\", \"1/1/2002\", random.random()), border=1)\n",
120 |     "\n",
121 |     "    pdf.set_x(25)\n",
122 |     "    pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Gender', border=1)\n",
123 |     "    pdf.cell(ln=1, h=8.0, align='L', w=75, txt=gender, border=1)\n",
124 |     "\n",
125 |     "    pdf.set_x(25)\n",
126 |     "    pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Post Code', border=1)\n",
127 |     "    pdf.cell(ln=1, h=8.0, align='L', w=75, txt=str(int(4000*random.random())), border=1)\n",
128 |     "\n",
129 |     "    pdf.dashed_line(20, 65, 180, 65, dash_length = 1, space_length = 1)\n",
130 |     "\n",
131 |     "    pdf.set_xy(25,70)\n",
132 |     "    pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Admission Date', border=1)\n",
133 |     "    pdf.cell(ln=1, h=8.0, align='L', w=75, txt=random_date(\"01/01/2020\", \"01/01/2020\", random.random()), border=1)\n",
134 |     "\n",
135 |     "\n",
136 |     "    pdf.set_x(25)\n",
137 |     "    pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Discharge Date', border=1)\n",
138 |     "    pdf.cell(ln=1, h=8.0, align='L', w=75, txt=random_date(\"01/02/2020\", \"02/01/2020\", random.random()), border=1)\n",
139 |     "\n",
140 |     "    pdf.dashed_line(20, 90, 180, 90, dash_length = 1, space_length = 1)\n",
141 |     "    pdf.set_x(60)\n",
142 |     "    pdf.set_font('helvetica', '', 13.0)\n",
143 |     "    pdf.cell(ln=1, h=13.0, align='C', w=75.0, txt='Medications', border=0)\n",
144 |     "\n",
145 |     "    pdf.set_font('helvetica', '', 10.0)\n",
146 |     "\n",
147 |     "    for i in range(1,5):\n",
148 |     "        pdf.set_x(25)\n",
149 |     "        pdf.cell(ln=1, h=4.0, align='L', w=120, txt=drugs[int(random.random()*41070)+1][5], border=1)\n",
150 |     "\n",
151 |     "    pdf.set_font('helvetica', '', 10.0)\n",
152 |     "\n",
153 |     "    pdf.set_line_width(0.0)\n",
154 |     "    pdf.rect(15.0, 15.0, 170.0, 245.0)\n",
155 |     "\n",
156 |     "\n",
157 |     "    codes = list(['I30','I31','I32','I33','I34','I35','I36','I37','I38','I39',\n",
158 |     "                 'H05','H10','H12','H54','H34','H22','H44','H01','H24','H55',\n",
159 |     "                 'M05','M10','M12','M54','M34','M22','M44','M01','M24','M55'])\n",
160 |     "    import icd10\n",
161 |     "\n",
162 |     "    for i in range(1,3):\n",
163 |     "        code = icd10.find(codes[int(random.random()*28)])\n",
164 |     "        if not code:\n",
165 |     "            continue\n",
166 |     "        print(code.description)\n",
167 |     "        input_ids = tokenizer.encode('The patient was diagnosed with ' + code.description +\" and showed symptoms of\", return_tensors='tf')\n",
168 |     "        sample_outputs = model.generate(input_ids,do_sample=True, max_length=100, top_k=100, top_p=0.85, num_return_sequences=1)\n",
169 |     "        output = tokenizer.decode(sample_outputs[0], skip_special_tokens=True).replace(\"\\r\\n\", \"\").replace('\\n\\n', '').replace('\\n', '')\n",
170 |     "        pdf.set_xy(25,65+(i*60))\n",
171 |     "        pdf.multi_cell( h=4.0, align='L', w=160, txt=output, border=0)\n",
172 |     "        input_ids = tokenizer.encode('The patient was given  ' + drugs[int(random.random()*41070)+1][5], return_tensors='tf')\n",
173 |     "        sample_outputs = model.generate(input_ids,do_sample=True, max_length=100, top_k=100, top_p=0.85, num_return_sequences=1)\n",
174 |     "        output = tokenizer.decode(sample_outputs[0], skip_special_tokens=True).replace(\"\\r\\n\", \"\").replace('\\n\\n', '').replace('\\n', '')\n",
175 |     "        pdf.set_xy(25,100+(i*60))\n",
176 |     "        pdf.multi_cell( h=4.0, align='L', w=160, txt=output, border=0)\n",
177 |     "    pdf.output(f\"./{j}_summary.pdf\", 'F')\n"
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": null,
183 |    "metadata": {},
184 |    "outputs": [],
185 |    "source": []
186 |   }
187 |  ],
188 |  "metadata": {
189 |   "kernelspec": {
190 |    "display_name": "conda_python3",
191 |    "language": "python",
192 |    "name": "conda_python3"
193 |   },
194 |   "language_info": {
195 |    "codemirror_mode": {
196 |     "name": "ipython",
197 |     "version": 3
198 |    },
199 |    "file_extension": ".py",
200 |    "mimetype": "text/x-python",
201 |    "name": "python",
202 |    "nbconvert_exporter": "python",
203 |    "pygments_lexer": "ipython3",
204 |    "version": "3.6.10"
205 |   }
206 |  },
207 |  "nbformat": 4,
208 |  "nbformat_minor": 4
209 | }
210 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/report-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "!pip3 install fpdf\n",
 10 |     "!pip3 install names\n",
 11 |     "!pip3 install icd10-cm\n",
 12 |     "!pip3 install transformers\n",
 13 |     "!pip3 install tensorflow==2.1"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 7,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "no_of_reports = 10"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 2,
 28 |    "metadata": {},
 29 |    "outputs": [
 30 |     {
 31 |      "name": "stdout",
 32 |      "output_type": "stream",
 33 |      "text": [
 34 |       "03/24/1972\n"
 35 |      ]
 36 |     }
 37 |    ],
 38 |    "source": [
 39 |     "#random dates\n",
 40 |     "import random\n",
 41 |     "import time\n",
 42 |     "\n",
 43 |     "def str_time_prop(start, end, format, prop):\n",
 44 |     "    \"\"\"Get a time at a proportion of a range of two formatted times.\n",
 45 |     "\n",
 46 |     "    start and end should be strings specifying times formated in the\n",
 47 |     "    given format (strftime-style), giving an interval [start, end].\n",
 48 |     "    prop specifies how a proportion of the interval to be taken after\n",
 49 |     "    start.  The returned time will be in the specified format.\n",
 50 |     "    \"\"\"\n",
 51 |     "\n",
 52 |     "    stime = time.mktime(time.strptime(start, format))\n",
 53 |     "    etime = time.mktime(time.strptime(end, format))\n",
 54 |     "\n",
 55 |     "    ptime = stime + prop * (etime - stime)\n",
 56 |     "\n",
 57 |     "    return time.strftime(format, time.localtime(ptime))\n",
 58 |     "\n",
 59 |     "\n",
 60 |     "def random_date(start, end, prop):\n",
 61 |     "    return str_time_prop(start, end, '%m/%d/%Y', prop)\n",
 62 |     "\n",
 63 |     "print(random_date(\"1/1/1960\", \"1/1/2002\", random.random()))"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 3,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "import csv\n",
 73 |     "drugs = list(csv.reader(open('drug/Products.txt', 'r'), delimiter='\\t'))"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 4,
 79 |    "metadata": {},
 80 |    "outputs": [
 81 |     {
 82 |      "data": {
 83 |       "application/vnd.jupyter.widget-view+json": {
 84 |        "model_id": "4b348cf4465242538ad0c5e2188c6556",
 85 |        "version_major": 2,
 86 |        "version_minor": 0
 87 |       },
 88 |       "text/plain": [
 89 |        "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1042301.0, style=ProgressStyle(descript…"
 90 |       ]
 91 |      },
 92 |      "metadata": {},
 93 |      "output_type": "display_data"
 94 |     },
 95 |     {
 96 |      "name": "stdout",
 97 |      "output_type": "stream",
 98 |      "text": [
 99 |       "\n"
100 |      ]
101 |     },
102 |     {
103 |      "data": {
104 |       "application/vnd.jupyter.widget-view+json": {
105 |        "model_id": "6df689b618b34e4785cb178774ae670c",
106 |        "version_major": 2,
107 |        "version_minor": 0
108 |       },
109 |       "text/plain": [
110 |        "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=456318.0, style=ProgressStyle(descripti…"
111 |       ]
112 |      },
113 |      "metadata": {},
114 |      "output_type": "display_data"
115 |     },
116 |     {
117 |      "name": "stdout",
118 |      "output_type": "stream",
119 |      "text": [
120 |       "\n"
121 |      ]
122 |     },
123 |     {
124 |      "data": {
125 |       "application/vnd.jupyter.widget-view+json": {
126 |        "model_id": "c837c2a6994b4fcc8d1d2663cc73a375",
127 |        "version_major": 2,
128 |        "version_minor": 0
129 |       },
130 |       "text/plain": [
131 |        "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=665.0, style=ProgressStyle(description_…"
132 |       ]
133 |      },
134 |      "metadata": {},
135 |      "output_type": "display_data"
136 |     },
137 |     {
138 |      "name": "stdout",
139 |      "output_type": "stream",
140 |      "text": [
141 |       "\n"
142 |      ]
143 |     },
144 |     {
145 |      "data": {
146 |       "application/vnd.jupyter.widget-view+json": {
147 |        "model_id": "234ef9a023eb41e6855cdd52ec9668a0",
148 |        "version_major": 2,
149 |        "version_minor": 0
150 |       },
151 |       "text/plain": [
152 |        "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=497933648.0, style=ProgressStyle(descri…"
153 |       ]
154 |      },
155 |      "metadata": {},
156 |      "output_type": "display_data"
157 |     },
158 |     {
159 |      "name": "stdout",
160 |      "output_type": "stream",
161 |      "text": [
162 |       "\n"
163 |      ]
164 |     },
165 |     {
166 |      "name": "stderr",
167 |      "output_type": "stream",
168 |      "text": [
169 |       "All model checkpoint weights were used when initializing TFGPT2LMHeadModel.\n",
170 |       "\n",
171 |       "All the weights of TFGPT2LMHeadModel were initialized from the model checkpoint at gpt2.\n",
172 |       "If your task is similar to the task the model of the ckeckpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.\n"
173 |      ]
174 |     }
175 |    ],
176 |    "source": [
177 |     "import tensorflow as tf\n",
178 |     "from transformers import TFGPT2LMHeadModel, GPT2Tokenizer\n",
179 |     "\n",
180 |     "\n",
181 |     "tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n",
182 |     "\n",
183 |     "# add the EOS token as PAD token to avoid warnings\n",
184 |     "model = TFGPT2LMHeadModel.from_pretrained(\"gpt2\", pad_token_id=tokenizer.eos_token_id)"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": 10,
190 |    "metadata": {},
191 |    "outputs": [
192 |     {
193 |      "name": "stdout",
194 |      "output_type": "stream",
195 |      "text": [
196 |       "Conjunctivitis\n",
197 |       "Disorders of globe\n",
198 |       "Acute pericarditis\n",
199 |       "Other diseases of pericardium\n"
200 |      ]
201 |     }
202 |    ],
203 |    "source": [
204 |     "import os\n",
205 |     "from fpdf import FPDF\n",
206 |     "import random\n",
207 |     "import names\n",
208 |     "\n",
209 |     "for j in range(1,int(no_of_reports+1)):\n",
210 |     "    pdf = FPDF(format = 'A4')\n",
211 |     "    pdf.add_page()\n",
212 |     "    pdf.set_font('helvetica', '', 13.0)\n",
213 |     "    pdf.set_xy(60,8)\n",
214 |     "    pdf.cell(ln=1, h=22.0, align='C', w=75.0, txt='Discharge Summary (Synthetically generated, contains meaningless text)', border=0)\n",
215 |     "    pdf.set_x(25)\n",
216 |     "    pdf.set_font('helvetica', '', 10.0)\n",
217 |     "    gender = \"male\" if random.random() > 0.5  else \"female\"\n",
218 |     "\n",
219 |     "    pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Name', border=1)\n",
220 |     "    pdf.cell(ln=1, h=8.0, align='L', w=75, txt=names.get_full_name(gender=gender), border=1)\n",
221 |     "\n",
222 |     "\n",
223 |     "    pdf.set_x(25)\n",
224 |     "    pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Birth Date', border=1)\n",
225 |     "    pdf.cell(ln=1, h=8.0, align='L', w=75, txt=random_date(\"1/1/1960\", \"1/1/2002\", random.random()), border=1)\n",
226 |     "\n",
227 |     "    pdf.set_x(25)\n",
228 |     "    pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Gender', border=1)\n",
229 |     "    pdf.cell(ln=1, h=8.0, align='L', w=75, txt=gender, border=1)\n",
230 |     "\n",
231 |     "    pdf.set_x(25)\n",
232 |     "    pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Post Code', border=1)\n",
233 |     "    pdf.cell(ln=1, h=8.0, align='L', w=75, txt=str(int(4000*random.random())), border=1)\n",
234 |     "\n",
235 |     "    pdf.dashed_line(20, 65, 180, 65, dash_length = 1, space_length = 1)\n",
236 |     "\n",
237 |     "    pdf.set_xy(25,70)\n",
238 |     "    pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Admission Date', border=1)\n",
239 |     "    pdf.cell(ln=1, h=8.0, align='L', w=75, txt=random_date(\"01/01/2020\", \"01/01/2020\", random.random()), border=1)\n",
240 |     "\n",
241 |     "\n",
242 |     "    pdf.set_x(25)\n",
243 |     "    pdf.cell(ln=0, h=8.0, align='L', w=75, txt='Discharge Date', border=1)\n",
244 |     "    pdf.cell(ln=1, h=8.0, align='L', w=75, txt=random_date(\"01/02/2020\", \"02/01/2020\", random.random()), border=1)\n",
245 |     "\n",
246 |     "    pdf.dashed_line(20, 90, 180, 90, dash_length = 1, space_length = 1)\n",
247 |     "    pdf.set_x(60)\n",
248 |     "    pdf.set_font('helvetica', '', 13.0)\n",
249 |     "    pdf.cell(ln=1, h=13.0, align='C', w=75.0, txt='Medications', border=0)\n",
250 |     "\n",
251 |     "    pdf.set_font('helvetica', '', 10.0)\n",
252 |     "\n",
253 |     "    for i in range(1,5):\n",
254 |     "        pdf.set_x(25)\n",
255 |     "        pdf.cell(ln=1, h=4.0, align='L', w=120, txt=drugs[int(random.random()*41070)+1][5], border=1)\n",
256 |     "\n",
257 |     "    pdf.set_font('helvetica', '', 10.0)\n",
258 |     "\n",
259 |     "    pdf.set_line_width(0.0)\n",
260 |     "    pdf.rect(15.0, 15.0, 170.0, 245.0)\n",
261 |     "\n",
262 |     "\n",
263 |     "    codes = list(['I30','I31','I32','I33','I34','I35','I36','I37','I38','I39',\n",
264 |     "                 'H05','H10','H12','H54','H34','H22','H44','H01','H24','H55',\n",
265 |     "                 'M05','M10','M12','M54','M34','M22','M44','M01','M24','M55'])\n",
266 |     "    import icd10\n",
267 |     "\n",
268 |     "    for i in range(1,3):\n",
269 |     "        code = icd10.find(codes[int(random.random()*28)])\n",
270 |     "        if not code:\n",
271 |     "            continue\n",
272 |     "        print(code.description)\n",
273 |     "        input_ids = tokenizer.encode('The patient was diagnosed with ' + code.description +\" and showed symptoms of\", return_tensors='tf')\n",
274 |     "        sample_outputs = model.generate(input_ids,do_sample=True, max_length=100, top_k=100, top_p=0.85, num_return_sequences=1)\n",
275 |     "        output = tokenizer.decode(sample_outputs[0], skip_special_tokens=True).replace(\"\\r\\n\", \"\").replace('\\n\\n', '').replace('\\n', '')\n",
276 |     "        pdf.set_xy(25,65+(i*60))\n",
277 |     "        pdf.multi_cell( h=4.0, align='L', w=160, txt=output, border=0)\n",
278 |     "        input_ids = tokenizer.encode('The patient was given  ' + drugs[int(random.random()*41070)+1][5], return_tensors='tf')\n",
279 |     "        sample_outputs = model.generate(input_ids,do_sample=True, max_length=100, top_k=100, top_p=0.85, num_return_sequences=1)\n",
280 |     "        output = tokenizer.decode(sample_outputs[0], skip_special_tokens=True).replace(\"\\r\\n\", \"\").replace('\\n\\n', '').replace('\\n', '')\n",
281 |     "        pdf.set_xy(25,100+(i*60))\n",
282 |     "        pdf.multi_cell( h=4.0, align='L', w=160, txt=output, border=0)\n",
283 |     "    pdf.output(f\"./{j}_summary.pdf\", 'F')\n"
284 |    ]
285 |   },
286 |   {
287 |    "cell_type": "code",
288 |    "execution_count": null,
289 |    "metadata": {},
290 |    "outputs": [],
291 |    "source": []
292 |   }
293 |  ],
294 |  "metadata": {
295 |   "kernelspec": {
296 |    "display_name": "conda_python3",
297 |    "language": "python",
298 |    "name": "conda_python3"
299 |   },
300 |   "language_info": {
301 |    "codemirror_mode": {
302 |     "name": "ipython",
303 |     "version": 3
304 |    },
305 |    "file_extension": ".py",
306 |    "mimetype": "text/x-python",
307 |    "name": "python",
308 |    "nbconvert_exporter": "python",
309 |    "pygments_lexer": "ipython3",
310 |    "version": "3.6.10"
311 |   }
312 |  },
313 |  "nbformat": 4,
314 |  "nbformat_minor": 4
315 | }
316 | 


--------------------------------------------------------------------------------