├── .gitignore ├── LICENSE ├── README.md ├── configs ├── example_config_for_detaset_creation.yaml └── example_train_config.yaml ├── data ├── example_project_data │ ├── prepared_generated_data_for_example_project.csv │ └── raw_generated_data_for_example_project.csv ├── medical_tasks_gpt4 │ └── prepared_generated_data_for_medical_tasks.csv ├── nhs_conditions_small_sample │ ├── data_split_by_length.csv │ └── original_data.csv ├── nhs_uk_full │ ├── prepared_generated_data_for_nhs_uk_conversations.csv │ └── prepared_generated_data_for_nhs_uk_qa.csv └── prompts.json ├── experiments ├── Dataset Generation.ipynb ├── Prompt Creation.ipynb └── Supervised Training.ipynb ├── llama_train_requirements.txt ├── opengpt ├── config.py ├── data_collator.py ├── dataset_utils.py ├── model_utils.py ├── parsers.py ├── prompt_utils.py └── teachers.py ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | #Directories to be ignored fully 2 | /books/ 3 | /articles/ 4 | /other/ 5 | /output/ 6 | /graphics/ 7 | models/ 8 | static/ 9 | dist/ 10 | tmp/ 11 | logs/ 12 | results/ 13 | wandb/ 14 | *_tmp/ 15 | *.egg-info/ 16 | build/ 17 | .idea 18 | venv 19 | db.sqlite3 20 | .ipynb_checkpoints/ 21 | opengpt.code-workspace 22 | 23 | #tmp and similar files 24 | .nfs* 25 | *.log 26 | *.pyc 27 | *.out 28 | *.swp 29 | *.swn 30 | tmp_* 31 | t_* 32 | tmp_* 33 | *_tmp 34 | *.swo 35 | *.lyx.emergency 36 | *.lyx# 37 | *~ 38 | *hidden* 39 | nohup.out 40 | tmp.py 41 | .DS_Store 42 | *.lock 43 | 44 | # models files 45 | *.dat 46 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OpenGPT 2 | 3 | A framework for creating grounded instruction based datasets and training conversational domain expert Large Language Models (LLMs). 4 | 5 | Learn more in our blog: [AI for Healthcare | Introducing OpenGPT](https://aiforhealthcare.substack.com/p/a-large-language-model-for-healthcare). 6 | 7 |

8 | 9 |

10 | 11 | ## NHS-LLM 12 | A conversational model for healthcare trained using OpenGPT. All the medical datasets used to train this model were created using OpenGPT and are available below. 13 | 14 | ## Available datasets 15 | - NHS UK Q/A, 24,665 question and answer pairs, Prompt used: f53cf99826, Generated via OpenGPT using data available on the [NHS UK Website](https://www.nhs.uk/conditions/). Download [here](./data/nhs_uk_full/prepared_generated_data_for_nhs_uk_qa.csv) 16 | - NHS UK Conversations, 2,354 unique conversations, Prompt used: f4df95ec69, Generated via OpenGPT using data available on the [NHS UK Website](https://www.nhs.uk/conditions/). Download [here](./data/nhs_uk_full/prepared_generated_data_for_nhs_uk_conversations.csv) 17 | - Medical Task/Solution, 4,688 pairs generated via OpenGPT using GPT-4, prompt used: 5755564c19. Download [here](./data/medical_tasks_gpt4/prepared_generated_data_for_medical_tasks.csv) 18 | 19 | All datasets are in the `/data` folder. 20 | 21 | ## Installation 22 | ``` 23 | pip install opengpt 24 | ``` 25 | If you are working with LLaMA models, you will also need some extra requirements: 26 | ``` 27 | pip install -r ./llama_train_requirements.txt 28 | ``` 29 | 30 | ## Tutorials 31 | 32 | - Making a mini conversational LLM for healthcare, [Google Colab - OpenGPT | The making of Dum-E](https://colab.research.google.com/drive/1GQj9dwBSCmzEh1PmbRlQQYlojCvOG-qG?usp=sharing) 33 | 34 | 35 | ## How to 36 | 37 | 1. We start by collecting a base dataset in a certain domain. For example, collect definitions of all disases (e.g. from [NHS UK](https://www.nhs.uk/conditions/)). You can find a small sample dataset [here](https://github.com/CogStack/OpenGPT/blob/main/data/nhs_conditions_small_sample/original_data.csv). It is important that the collected dataset has a column named `text` where each row of the CSV has one disease definition. 38 | 39 | 2. Find a prompt matching your use case in the [prompt database](https://github.com/CogStack/OpenGPT/blob/main/data/prompts.json), or create a new prompt using the [Prompt Creation Notebook](https://github.com/CogStack/OpenGPT/blob/main/experiments/Prompt%20Creation.ipynb). A prompt will be used to generate tasks/solutions based on the `context` (the dataset collected in step 1.) 40 | - Edit the config file for dataset generation and add the appropirate promtps and datasets ([example config file](https://github.com/CogStack/OpenGPT/blob/main/configs/example_config_for_detaset_creation.yaml)). 41 | - Run the Dataset generation notebook ([link](https://github.com/CogStack/OpenGPT/blob/main/experiments/Dataset%20Generation.ipynb)) 42 | 43 | 3. Edit the [train_config](https://github.com/CogStack/OpenGPT/blob/main/configs/example_train_config.yaml) file and add the datasets you want to use for training. 44 | 4. Use the [train notebook](https://github.com/CogStack/OpenGPT/blob/main/experiments/Supervised%20Training.ipynb) or run the training scripts to train a model on the new dataset you created. 45 | 46 | **If you have any questions please checkout [discourse](https://discourse.cogstack.org/)** 47 | 48 | ## More Examples 49 | 50 |

51 | 52 |

53 | 54 | 55 |

56 | 57 |

58 | 59 |

60 | 61 |

62 | 63 | 64 | -------------------------------------------------------------------------------- /configs/example_config_for_detaset_creation.yaml: -------------------------------------------------------------------------------- 1 | #If starting a new project, please copy this config and change the `name` and set the `base_path`. 2 | # You can also remove things that you do not need or just leave them blank. 3 | #If you are using this to generate a dataset, then configure the `datasets`, `openai`and `prompts` parameters. 4 | name: 'example_project_data' 5 | base_path: '../data/' # Where the new created datasets, interim files and everything else will be saved 6 | to_box: True # Should all properities of the config class be coverted to Box, box makes properties accessible with a . (e.g. config.name, instead of config['name']) 7 | special_tokens: 8 | user: "<|user|>" # For chat like interactions we want to have a and token 9 | ai: "<|ai|>" # See above 10 | eos: "<|eos|>" # End of stream (one question, or one answer, or one message) 11 | eod: "<|eod|>" # End of document, or conversation - in other words the text that comes after this token is not related to the text before it 12 | pad: "<|pad|>" # Padding 13 | teacher: 14 | name: 'openai' # Has to be one of the available teachers in opengpt/teachers.py 15 | max_len: 2560 # Max length of text in tokens (by tiktoken) to send to OpenAI, usually 3/4 of the max length, longer sequences will be split 16 | min_len: 10 # The minimum length of the context in words, if less an example will be skipped 17 | model: 'gpt-3.5-turbo' # Model to be used as teacher (gpt-4 or gpt-3.5-turbo for openai) 18 | static_paths: 19 | prompt_db: "../data/prompts.json" # Where is the propmpt database located 20 | data_generation_checkpoint_every: 5 # When querying the teacher, after this many queries a checkpoint will be saved on disk 21 | datasets: 22 | # All datasets to be used to generate grounded instruction-based datasets. Every dataset (CSV) has to have a `text` column that 23 | # will be sent to the Teacher as contex (chatgpt, gpt-4, ...): 24 | # name - the name to be used for this dataset, this name is used to reference this dataset in prompts 25 | # path - where is the csv 26 | # nrows - how many rows from the csv should be processed, usually used for testing only, -1 or None if all rows should be processed 27 | - name: "nhs_conditions_small_sample" 28 | path: "../data/nhs_conditions_small_sample/original_data.csv" 29 | nrows: -1 30 | prompts: 31 | - hashes: [f53cf99826, f4df95ec69] # Hashes of prompts to be used 32 | languages: ["English", "French"] # Some prompts have a {language} field, so this wil lbe used to populate it 33 | random_prompt: True # If True, for each example in the datasets a random prompt will be picked from `hashes`, otherwise all prompts will be used sequentially 34 | datasets: ["nhs_conditions_small_sample"] # Datasets to be used with the prompt hashes above, name of the dataset has to match what is defined in `datasets` 35 | runs: 2 # How many iterrations to do, so if we put 5 we will send each document from the `datasets` 5 times to the Teacher (e.g. ChatGPT) 36 | extra_parameters: # Extra paramters that the prompt might require 37 | quantity: 10 38 | -------------------------------------------------------------------------------- /configs/example_train_config.yaml: -------------------------------------------------------------------------------- 1 | #If starting a new project, please copy this config and change the `name` and set the `base_path`. 2 | # You can also remove things that you do not need or just leave them blank. 3 | #If you are using this to generate a dataset, then configure the `datasets`, `openai`and `prompts` parameters. 4 | name: 'example_project_train' 5 | base_path: '../data/' # Where the new created datasets, interim files and everything else will be saved 6 | to_box: True # Should all properities of the config class be coverted to Box, box makes properties accessible with a . (e.g. config.name, instead of config['name']) 7 | special_tokens: 8 | user: "<|user|>" # For chat like interactions we want to have a and token 9 | ai: "<|ai|>" # See above 10 | eos: "<|eos|>" # End of stream (one question, or one answer, or one message) 11 | eod: "<|eod|>" # End of document, or conversation - in other words the text that comes after this token is not related to the text before it 12 | pad: "<|pad|>" # Padding 13 | test: 14 | dataset: "" # If you have one 15 | train: # Training parameters 16 | model: 'olm/olm-gpt2-oct-2022' # This model can be used for testing, but the performance will not be the best (we need bigger models) 17 | # The models below require bigger GPUs, usually at least one A100 (80GB) or more smaller GPUs 18 | #model: 'stabilityai/stablelm-base-alpha-3b' 19 | #model: '/llama-hf/7B' 20 | #model: 'facebook/opt-1.3b' 21 | datasets: # One or more datasets to be used for training, the csvs have to have the same columns 22 | - "../data/example_project_data/prepared_generated_data_for_example_project.csv" 23 | - "../data/nhs_uk_full/prepared_generated_data_for_nhs_uk_qa.csv" 24 | - "../data/nhs_uk_full/prepared_generated_data_for_nhs_uk_conversations.csv" 25 | - "../data/medical_tasks_gpt4/prepared_generated_data_for_medical_tasks.csv" 26 | ignore_index: -100 # This will be added as label if we want to skip something 27 | max_seq_len: 512 # Should match the models max seq len, or be smaller 28 | packing_type: 'partial' # one of 'partial', 'full' or 'none' - IMPORTANT, but experimental, Full/Partial will speedup the training drastically (2-3x) 29 | shuffle_dataset: True # Will shuffle the dataset after loading, usually better not to do this and during data preparation make sure your dataset is in the right shape 30 | hf_training_arguments: 31 | output_dir: '../data/results/' 32 | gradient_accumulation_steps: 16 # Aim for a BS of 128, forumla is: n_dev * batch_size * acc_steps 33 | per_device_eval_batch_size: 1 34 | per_device_train_batch_size: 1 35 | load_best_model_at_end: False 36 | learning_rate: 2.0e-5 # Use float with 'e-x' notation 37 | weight_decay: 0.1 38 | adam_beta1: 0.9 39 | adam_beta2: 0.95 40 | adam_epsilon: 1.0e-7 41 | max_grad_norm: 1 42 | num_train_epochs: 1 43 | lr_scheduler_type: 'cosine' 44 | warmup_ratio: 0.03 45 | logging_strategy: 'steps' 46 | logging_steps: 100 47 | save_strategy: "steps" 48 | save_steps: 30000 49 | seed: 11 50 | optim: 'adamw_hf' 51 | do_eval: False 52 | #bf16: True # Enable if supported by your GPUs 53 | #tf32: True 54 | #fsdp: "full_shard auto_wrap" # Enable for distributed training 55 | #fsdp_transformer_layer_cls_to_wrap: "LlamaDecoderLayer" 56 | -------------------------------------------------------------------------------- /data/nhs_conditions_small_sample/data_split_by_length.csv: -------------------------------------------------------------------------------- 1 | ,text,url,len,part 2 | 0,"Overview 3 | High blood pressure (hypertension) 4 | High blood pressure, or hypertension, rarely has noticeable symptoms. But if untreated, it increases your risk of serious problems such as heart attacks and strokes. 5 | Around a third of adults in the UK have high blood pressure, although many will not realise it. 6 | The only way to find out if your blood pressure is high is to have your blood pressure checked. 7 | What is high blood pressure? 8 | Blood pressure is recorded with 2 numbers. The systolic pressure (higher number) is the force at which your heart pumps blood around your body. 9 | The diastolic pressure (lower number) is the resistance to the blood flow in the blood vessels. 10 | They're both measured in millimetres of mercury (mmHg). 11 | As a general guide: 12 | high blood pressure is considered to be from 140/90mmHg (or an average of 135/85mmHg at home) – or 150/90mmHg (or an average of 145/85mmHg at home) if you're over the age of 80 13 | ideal blood pressure is usually considered to be between 90/60mmHg and 120/80mmHg, while the target for over-80s is below 150/90mmHg (or 145/85mmHg at home) 14 | Blood pressure readings between 120/80mmHg and 140/90mmHg could mean you're at risk of developing high blood pressure if you do not take steps to keep your blood pressure under control. 15 | Everyone's blood pressure will be slightly different. What's considered low or high for you may be normal for someone else. 16 | Risks of high blood pressure 17 | If your blood pressure is too high, it puts extra strain on your blood vessels, heart and other organs, such as the brain, kidneys and eyes. 18 | Persistent high blood pressure can increase your risk of a number of serious and potentially life-threatening health conditions, such as: 19 | heart disease 20 | heart attacks 21 | strokes 22 | heart failure 23 | peripheral arterial disease 24 | aortic aneurysms 25 | kidney disease 26 | vascular dementia 27 | If you have high blood pressure, reducing it even a small amount can help lower your risk of these health conditions. 28 | Check your blood pressure 29 | The only way of knowing whether you have high blood pressure is to have a blood pressure test. 30 | All adults over 40 are advised to have their blood pressure checked at least every 5 years. 31 | Getting this done is easy and could save your life. 32 | You can get your blood pressure tested at a number of places, including: 33 | at your GP surgery 34 | at some pharmacies 35 | as part of your NHS Health Check 36 | in some workplaces 37 | You can also check your blood pressure yourself with a home blood pressure monitor. 38 | Find out more about getting a blood pressure test 39 | Things that can increase your risk of getting high blood pressure 40 | It's not always clear what causes high blood pressure, but there are things that can increase your risk. 41 | You might be more at risk if you: 42 | are overweight 43 | eat too much salt and do not eat enough fruit and vegetables 44 | do not do enough exercise 45 | drink too much alcohol or coffee (or other caffeine-based drinks) 46 | smoke 47 | do not get much sleep or have disturbed sleep 48 | are over 65 49 | have a relative with high blood pressure 50 | are of black African or black Caribbean descent 51 | live in a deprived area 52 | Making healthy lifestyle changes can sometimes help reduce your chances of getting high blood pressure and help lower your blood pressure if it's already high. 53 | Treatment for high blood pressure 54 | Doctors can help you keep your blood pressure to a safe level using: 55 | lifestyle changes 56 | medicines 57 | What works best is different for each person. 58 | Talk to your doctor to help you decide about treatment. 59 | This patient decision aid (PDF, 132kb) can also help you to understand your treatment options. 60 | Lifestyle changes to reduce blood pressure 61 | These lifestyle changes can help prevent and lower high blood pressure: 62 | reduce the amount of salt you eat and have a generally healthy diet 63 | cut back on alcohol 64 | lose weight if you're overweight 65 | exercise regularly 66 | cut down on caffeine 67 | stop smoking 68 | Some people with high blood pressure may also need to take 1 or more medicines to stop their blood pressure getting too high. 69 | Medicines for high blood pressure 70 | If you're diagnosed with high blood pressure, your doctor may recommend taking 1 or more medicines to keep it under control. 71 | These come as tablets and usually need to be taken once a day. 72 | Common blood pressure medicines include: 73 | ACE inhibitors – such as enalapril, lisinopril, perindopril and ramipril 74 | angiotensin-2 receptor blockers (ARBs) – such as candesartan, irbesartan, losartan, valsartan and olmesartan 75 | calcium channel blockers – such as amlodipine, felodipine and nifedipine or diltiazem and verapamil 76 | diuretics – such as indapamide and bendroflumethiazide 77 | beta blockers – such as atenolol and bisoprolol 78 | alpha blockers – such as doxazosin 79 | other diuretics – such as amiloride and spironolactone 80 | The medicine recommended for you will depend on things like how high your blood pressure is, your age and your ethnicity.",https://www.nhs.uk/conditions/Blood-pressure-(high)/Pages/Introduction.aspx,1135,part_0 81 | 1,"Bronchiolitis 82 | Bronchiolitis is a common chest infection that affects babies and children under 2. It's usually mild and can be treated at home, but it can be serious. 83 | Bronchiolitis is different from bronchitis, which causes a cough with lots of mucus and can affect people of all ages. 84 | Check if it's bronchiolitis 85 | The early symptoms of bronchiolitis are similar to a cold, such as sneezing, a runny or blocked nose, a cough and a slightly high temperature of 38C. 86 | A child with bronchiolitis may then get other symptoms, such as: 87 | breathing more quickly 88 | finding it difficult to feed or eat 89 | noisy breathing (wheezing) 90 | becoming irritable 91 | Symptoms are usually worst between days 3 and 5, and the cough usually gets better in 3 weeks. 92 | Immediate action required: 93 | Call 999 or go to A&E if: 94 | your child is having difficulty breathing – you may notice grunting noises or their tummy sucking under their ribs 95 | there are pauses when your child breathes 96 | your child's skin, tongue or lips are blue 97 | your child is floppy and will not wake up or stay awake 98 | As a parent, you may know if your child seems seriously unwell and should trust your own judgement. 99 | Find your nearest A&E 100 | Urgent advice: 101 | Ask for an urgent GP appointment or call 111 if: 102 | your child has had a cold and it's getting worse 103 | your child is feeding or eating much less than normal 104 | your child has had a dry nappy for 12 hours or more, or shows other signs of dehydration 105 | your baby is under 3 months and has a temperature of 38C, or is older than 3 months and has a temperature of 39C or higher 106 | your baby feels hotter than usual when you touch their back or chest, or feels sweaty 107 | your child is very tired or irritable 108 | Treatments for bronchiolitis 109 | There's no specific treatment for bronchiolitis. It usually gets better on its own and you can look after your child at home. 110 | But it can be serious in some children, who may need to be treated in hospital. 111 | Do 112 | give children's paracetamol to babies and children over 2 months old or ibuprofen to babies and children over 3 months old – but do not give aspirin to a child under 16 113 | try using salt water (saline) drops if your child's nose is blocked 114 | keep your child upright as much as possible when they're awake – this will help them breathe more easily 115 | encourage your child to drink lots of fluids – try smaller feeds more often in babies, and give older children extra water or diluted fruit juice 116 | Don’t 117 | do not smoke around your child 118 | do not try to lower your child's temperature by sponging them with cool water or taking off all their clothes 119 | Preventing bronchiolitis 120 | There are some things you can do to lower the chances of your child getting bronchiolitis or spreading the viruses that cause it, such as: 121 | wash your hands and your child's hands often 122 | wash or wipe down toys and clean surfaces regularly 123 | use disposable tissues and throw them away as soon as you've used them 124 | keep newborn babies away from anyone with a cold or the flu – especially if they're under 2 months old or were premature 125 | It's also important not to smoke around your child. Children who breathe in cigarette smoke have a higher risk of getting bronchiolitis. 126 | Children at risk of severe bronchiolitis 127 | Some children may have a higher risk of getting seriously ill with bronchiolitis. 128 | This includes children who: 129 | were born very prematurely 130 | have a heart or lung condition 131 | have a weakened immune system 132 | These children may be able to have treatment in the winter (between October and March) to stop them getting severe bronchiolitis. 133 | Causes of bronchiolitis 134 | Bronchiolitis is caused by a viral infection, usually the respiratory syncytial virus (RSV). 135 | RSV is very common and spreads easily in coughs and sneezes. Almost all children have had it by the time they're 2. 136 | In older children and adults, RSV may cause a cough or cold, but in young children it can cause bronchiolitis.",https://www.nhs.uk/conditions/Bronchiolitis/,893,part_0 137 | 2,"Bronchitis 138 | Bronchitis is inflammation of the airways in the lungs that is usually caused by an infection. It often gets better without treatment in around 3 weeks. 139 | Some people have long-term inflammation of the airways in the lungs called chronic bronchitis. This is known as chronic obstructive pulmonary disease (COPD). 140 | Check if you have bronchitis 141 | Symptoms of bronchitis can be similar to a cold or flu. 142 | Symptoms include: 143 | a cough – you may cough up clear, white, yellow or green mucus 144 | chest pain when coughing 145 | shortness of breath 146 | a sore throat 147 | a runny nose 148 | a high temperature 149 | Things you can do to help with bronchitis 150 | There are some things you can do to ease the symptoms of bronchitis and reduce the risk of spreading infections to other people. 151 | Do 152 | get plenty of rest – try to stay at home and avoid contact with other people if you have a high temperature or do not feel well enough to do your normal activities 153 | drink plenty of fluids 154 | take painkillers like paracetamol or ibuprofen to help with pain and bring down a high temperature 155 | try adding honey to a warm drink to help soothe your throat (do not give honey to babies under 1) 156 | cover your mouth and nose with a tissue when you cough or sneeze – put used tissues in the bin as quickly as possible 157 | wash your hands regularly with water and soap 158 | Don’t 159 | do not smoke 160 | Urgent advice: 161 | Ask for an urgent GP appointment or get help from NHS 111 if: 162 | you've had a cough for more than 3 weeks 163 | you cough up blood or blood-stained mucus 164 | you have chest pain that comes and goes, or when breathing or coughing 165 | you're over 65 166 | you're pregnant 167 | you have a long-term condition, such as diabetes, or a heart, lung or kidney condition 168 | you have a weakened immune system – for example, you have a condition that affects the immune system, or you're having chemotherapy 169 | you feel very unwell 170 | Immediate action required: 171 | Call 999 if: 172 | you are struggling to breathe – you are choking, gasping and unable to speak 173 | you have pale, blue or blotchy skin, lips or tongue – on brown or black skin, this may be easier to see on the lips, tongue or gums, under the nails or around the eyes 174 | you suddenly feel confused – for example, you do not know where you are 175 | you're unable to wake your baby or they feel floppy 176 | Treatments for bronchitis 177 | Bronchitis usually clears up without treatment in around 3 weeks. See a GP if your symptoms last longer than 3 weeks. 178 | You may need antibiotics if your bronchitis is caused by a bacterial infection.",https://www.nhs.uk/conditions/Bronchitis/,574,part_0 179 | 3,"Steroids 180 | Steroids, also called corticosteroids, are anti-inflammatory medicines used to treat a range of conditions. 181 | They're different from anabolic steroids, which are often used illegally by some people to increase their muscle mass. 182 | Types of steroids 183 | Steroids come in many different forms. 184 | The main types are: 185 | tablets, syrups and liquids – such as prednisolone 186 | inhalers – such as beclometasone and fluticasone 187 | nasal sprays – such as beclometasone and fluticasone 188 | injections (given into joints, muscles or blood vessels) – such as methylprednisolone 189 | creams, lotions and gels – such as hydrocortisone skin cream 190 | Most steroids are only available on prescription, but a few (such as some creams or nasal sprays) can be bought from pharmacies and shops. 191 | Side effects of steroids 192 | Steroids do not tend to cause significant side effects if they're taken for a short time or at a low dose. 193 | But sometimes they can cause unpleasant side effects, such as an increased appetite, mood changes and difficulty sleeping. This is most common with steroid tablets. 194 | The side effects will usually pass once you finish the treatment, but do not stop taking your medicine without speaking to your doctor. Stopping a prescribed course of medicine can cause further unpleasant side effects (withdrawal symptoms). 195 | Read more about: 196 | side effects of steroid tablets 197 | side effects of steroid inhalers 198 | side effects of steroid nasal sprays 199 | side effects of steroid injections 200 | side effects of steroid creams 201 | You can report any suspected side effect to the Yellow Card Scheme. 202 | Uses for steroids 203 | Steroids can be used to treat a wide range of conditions, including: 204 | asthma and chronic obstructive pulmonary disease (COPD) 205 | hay fever 206 | hives and eczema 207 | painful joints or muscles – such as arthritis, tennis elbow and frozen shoulder 208 | pain caused by an irritated or trapped nerve – such as sciatica 209 | inflammatory bowel disease – such as Crohn's disease 210 | lupus 211 | multiple sclerosis (MS) 212 | How steroids work 213 | Steroids are a man-made version of hormones normally produced by the adrenal glands which are 2 small glands found above the kidneys. 214 | When taken in doses higher than the amount your body normally produces, steroids reduce redness and swelling (inflammation). This can help with inflammatory conditions such as asthma and eczema. 215 | Steroids also reduce the activity of the immune system, which is the body's natural defence against illness and infection. 216 | This can help treat autoimmune conditions, such as rheumatoid arthritis or lupus, which are caused by the immune system mistakenly attacking the body.",https://www.nhs.uk/conditions/Corticosteroid-(drugs)/Pages/Introduction.aspx,587,part_0 217 | 4,"Overview 218 | Creutzfeldt-Jakob disease 219 | Creutzfeldt-Jakob disease (CJD) is a rare and fatal condition that affects the brain. It causes brain damage that worsens rapidly over time. 220 | Symptoms of CJD 221 | Symptoms of CJD include: 222 | loss of intellect and memory 223 | changes in personality 224 | loss of balance and co-ordination 225 | slurred speech 226 | vision problems and blindness 227 | abnormal jerking movements 228 | progressive loss of brain function and mobility 229 | Most people with CJD will die within a year of the symptoms starting, usually from infection. 230 | This is because the immobility caused by CJD can make people with the condition vulnerable to infection. 231 | Read more about the symptoms of Creutzfeldt-Jakob disease and diagnosing Creutzfeldt-Jakob disease. 232 | What causes CJD? 233 | CJD appears to be caused by an abnormal infectious protein called a prion. These prions accumulate at high levels in the brain and cause irreversible damage to nerve cells. 234 | While the abnormal prions are technically infectious, they're very different from viruses and bacteria. 235 | For example, prions aren't destroyed by the extremes of heat and radiation used to kill bacteria and viruses, and antibiotics or antiviral medicines have no effect on them. 236 | Read more about the causes of Creutzfeldt-Jakob disease. 237 | Types of CJD 238 | There are 4 main types of CJD. 239 | Sporadic CJD 240 | Sporadic CJD is the most common type. 241 | The precise cause of sporadic CJD is unclear, but it's been suggested that a normal brain protein changes abnormally (""misfolds"") and turns into a prion. 242 | Most cases of sporadic CJD occur in adults aged between 45 and 75. On average, symptoms develop between the ages of 60 and 65. 243 | Despite being the most common type of CJD, sporadic CJD is still very rare, affecting only 1 or 2 people in every million each year in the UK. 244 | In 2020, there were 131 recorded deaths from sporadic CJD in the UK. 245 | Variant CJD 246 | Variant CJD (vCJD) is likely to be caused by consuming meat from a cow that had bovine spongiform encephalopathy (BSE, or ""mad cow"" disease), a similar prion disease to CJD. 247 | Since the link between variant CJD and BSE was discovered in 1996, strict controls have proved very effective in preventing meat from infected cattle entering the food chain. 248 | See preventing Creutzfeldt-Jakob disease for more information. 249 | But the average time it takes for the symptoms of variant CJD to occur after initial infection (the incubation period) is still unclear. 250 | The incubation period could be very long (more than 10 years) in some people, so those exposed to infected meat before the food controls were introduced can still develop variant CJD. 251 | The prion that causes variant CJD can also be transmitted by blood transfusion, although this has only happened 5 times in the UK. 252 | In 2020, there were no recorded deaths from variant CJD in the UK. 253 | Familial or inherited CJD 254 | Familial CJD is a very rare genetic condition where one of the genes a person inherits from their parent (the prion protein gene) carries a mutation that causes prions to form in their brain during adulthood, triggering the symptoms of CJD. 255 | It affects about 1 in every 9 million people in the UK. 256 | The symptoms of familial CJD usually first develop in people when they're in their early 50s. 257 | In 2020, there were 6 deaths from familial CJD and similar inherited prion diseases in the UK. 258 | Iatrogenic CJD 259 | Iatrogenic CJD is where the infection is accidentally spread from someone with CJD through medical or surgical treatment. 260 | For example, a common cause of iatrogenic CJD in the past was growth hormone treatment using human pituitary growth hormones extracted from deceased individuals, some of whom were infected with CJD. 261 | Synthetic versions of human growth hormone have been used since 1985, so this is no longer a risk. 262 | Iatrogenic CJD can also occur if instruments used during brain surgery on a person with CJD aren't properly cleaned between each surgical procedure and are reused on another person. 263 | But increased awareness of these risks means iatrogenic CJD is now very rare. 264 | In 2020, there was 1 death from iatrogenic CJD in the UK caused by receiving human growth hormone before 1985. 265 | How CJD is treated 266 | There's currently no cure for CJD, so treatment aims to relieve symptoms and make the affected person feel as comfortable as possible. 267 | This can include using medicine such as antidepressants to help with anxiety and depression, and painkillers to relieve pain. 268 | Some people will need nursing care and assistance with feeding. 269 | Read more about treating Creutzfeldt-Jakob disease. 270 | Variant CJD compensation scheme 271 | In October 2001, the government announced a compensation scheme for UK victims of variant CJD. 272 | The vCJD Trust assesses claims and pays compensation to victims and their families.",https://www.nhs.uk/conditions/Creutzfeldt-Jakob-disease/Pages/Introduction.aspx,1103,part_0 273 | 5,"Overview 274 | Atopic eczema 275 | Atopic eczema (atopic dermatitis) is the most common form of eczema, a condition that causes the skin to become itchy, dry and cracked. 276 | Atopic eczema is more common in children, often developing before their first birthday. But it may also develop for the first time in adults. 277 | It's usually a long-term (chronic) condition, although it can improve significantly, or even clear completely, in some children as they get older. 278 | Symptoms of atopic eczema 279 | Atopic eczema causes the skin to become itchy, dry, cracked and sore. 280 | Some people only have small patches of dry skin, but others may experience widespread inflamed skin all over the body. 281 | Inflamed skin can become red on lighter skin, and darker brown, purple or grey on darker skin. This can also be more difficult to see on darker skin. 282 | Although atopic eczema can affect any part of the body, it most often affects the hands, insides of the elbows, backs of the knees and the face and scalp in children. 283 | People with atopic eczema usually have periods when symptoms are less noticeable, as well as periods when symptoms become more severe (flare-ups). 284 | When to seek medical advice 285 | See a GP if you have symptoms of atopic eczema. They'll usually be able to diagnose atopic eczema by looking at your skin and asking questions, such as: 286 | whether the rash is itchy and where it appears 287 | when the symptoms first began 288 | whether it comes and goes over time 289 | whether there's a history of atopic eczema in your family 290 | whether you have any other conditions, such as allergies or asthma 291 | whether something in your diet or lifestyle may be contributing to your symptoms 292 | Typically, to be diagnosed with atopic eczema you should have had an itchy skin condition in the last 12 months and 3 or more of the following: 293 | visibly irritated red skin in the creases of your skin – such as the insides of your elbows or behind your knees (or on the cheeks, outsides of elbows, or fronts of the knees in children aged 18 months or under) at the time of examination by a health professional 294 | a history of skin irritation occurring in the same areas mentioned above 295 | generally dry skin in the last 12 months 296 | a history of asthma or hay fever – children under 4 must have an immediate relative, such as a parent, brother or sister, who has 1 of these conditions 297 | the condition started before the age of 2 (this does not apply to children under the age of 4) 298 | Causes of atopic eczema 299 | The exact cause of atopic eczema is unknown, but it's clear it is not down to one single thing. 300 | Atopic eczema often occurs in people who get allergies. ""Atopic"" means sensitivity to allergens. 301 | It can run in families, and often develops alongside other conditions, such as asthma and hay fever. 302 | The symptoms of atopic eczema often have certain triggers, such as soaps, detergents, stress and the weather. 303 | Sometimes food allergies can play a part, especially in young children with severe eczema. 304 | You may be asked to keep a food diary to try to determine whether a specific food makes your symptoms worse. 305 | Allergy tests are not usually needed, although they're sometimes helpful in identifying whether a food allergy may be triggering symptoms. 306 | Treating atopic eczema 307 | Treatment for atopic eczema can help to relieve the symptoms and many cases improve over time. 308 | But there's currently no cure and severe eczema often has a significant impact on daily life, which may be difficult to cope with physically and mentally. 309 | There's also an increased risk of skin infections. 310 | Many different treatments can be used to control symptoms and manage eczema, including: 311 | self-care techniques, such as reducing scratching and avoiding triggers 312 | emollients (moisturising treatments) – used on a daily basis for dry skin 313 | topical corticosteroids – used to reduce swelling, redness and itching during flare-ups 314 | Other types of eczema 315 | Eczema is the name for a group of skin conditions that cause dry, irritated skin. 316 | Other types of eczema include: 317 | discoid eczema – a type of eczema that occurs in circular or oval patches on the skin 318 | contact dermatitis – a type of eczema that occurs when the body comes into contact with a particular substance 319 | varicose eczema – a type of eczema that most often affects the lower legs and is caused by problems with the flow of blood through the leg veins 320 | seborrhoeic eczema – a type of eczema where red, scaly patches develop on the sides of the nose, eyebrows, ears and scalp 321 | dyshidrotic eczema (pompholyx) – a type of eczema that causes tiny blisters to erupt across the palms of the hands",https://www.nhs.uk/conditions/Eczema-(atopic)/Pages/Introduction.aspx,1075,part_0 322 | 6,"Overview 323 | HIV and AIDS 324 | HIV (human immunodeficiency virus) is a virus that damages the cells in your immune system and weakens your ability to fight everyday infections and disease. 325 | AIDS (acquired immune deficiency syndrome) is the name used to describe a number of potentially life-threatening infections and illnesses that happen when your immune system has been severely damaged by the HIV virus. 326 | While AIDS cannot be transmitted from 1 person to another, the HIV virus can. 327 | There's currently no cure for HIV, but there are very effective drug treatments that enable most people with the virus to live a long and healthy life. 328 | With an early diagnosis and effective treatments, most people with HIV will not develop any AIDS-related illnesses and will live a near-normal lifespan. 329 | Symptoms of HIV infection 330 | Most people experience a short flu-like illness 2 to 6 weeks after HIV infection, which lasts for a week or 2. 331 | After these symptoms disappear, HIV may not cause any symptoms for many years, although the virus continues to damage your immune system. 332 | This means many people with HIV do not know they're infected. 333 | Anyone who thinks they could have HIV should get tested. 334 | Some people are advised to have regular tests as they're at particularly high risk. 335 | Read more about who's most at risk of HIV 336 | Causes of HIV infection 337 | HIV is found in the body fluids of an infected person. This includes semen, vaginal and anal fluids, blood and breast milk. 338 | It's a fragile virus and does not survive outside the body for long. 339 | HIV cannot be transmitted through sweat, urine or saliva. 340 | The most common way of getting HIV in the UK is through having anal or vaginal sex without a condom. 341 | Other ways of getting HIV include: 342 | sharing needles, syringes or other injecting equipment 343 | transmission from mother to baby during pregnancy, birth or breastfeeding 344 | The chance of getting HIV through oral sex is very low and will be dependent on many things, such as whether you receive or give oral sex and the oral hygiene of the person giving the oral sex. 345 | Diagnosing HIV 346 | Seek medical advice as soon as possible if you think you might have been exposed to HIV. 347 | You can get tested in a number of places, including at a GP surgery, sexual health clinics and clinics run by charities. 348 | Find HIV testing services near you 349 | The only way to find out if you have HIV is to have an HIV test. This involves testing a sample of your blood or saliva for signs of the infection. 350 | It's important to be aware that: 351 | emergency anti-HIV medicine called post-exposure prophylaxis (PEP) may stop you becoming infected if started within 72 hours of possible exposure to the virus – it's recommended that you start it as soon as possible, ideally within 24 hours 352 | an early diagnosis means you can start treatment sooner, which can improve your chances of controlling the virus, reduce the risk of becoming more unwell and reduce the chance of passing the virus on to others 353 | Both positive and negative HIV tests may need to be repeated 1 to 3 months after potential exposure to HIV infection (this is known as the window period), but you should not wait this long to seek help: 354 | clinics may offer a finger prick blood test, which can give you a result in minutes, but it may take up to a few days to get the results of a more detailed HIV test 355 | home testing or home sampling kits are available to buy online or from pharmacies – depending on the type of test you use, your result will be available in a few minutes or a few days 356 | If your first test suggests you have HIV, a further blood test will need to be carried out to confirm the result. 357 | If this is positive, you'll be referred to a specialist HIV clinic for some more tests and a discussion about your treatment options. 358 | Treatment for HIV 359 | Antiretroviral medicines are used to treat HIV. They work by stopping the virus replicating in the body, allowing the immune system to repair itself and preventing further damage. 360 | These come in the form of tablets, which need to be taken every day. 361 | HIV is able to develop resistance to a single HIV medicine very easily, but taking a combination of different medicines makes this much less likely. 362 | Most people with HIV take a combination of medicines. It's vital these are taken every day as recommended by your doctor. 363 | The goal of HIV treatment is to have an undetectable viral load. This means the level of HIV virus in your body is low enough to not be detected by a test. 364 | Living with HIV 365 | If you're living with HIV, taking effective HIV treatment and being undetectable significantly reduces your risk of passing HIV on to others. 366 | You'll also be encouraged to: 367 | take regular exercise 368 | eat a healthy diet 369 | stop smoking 370 | have yearly flu jabs to minimise the risk of getting serious illnesses 371 | Without treatment, the immune system will become severely damaged, and life-threatening illnesses such as cancer and severe infections can occur. 372 | If you're planning on getting pregnant, it's important to talk to a GP. Although rare, it's possible to transmit HIV to your baby. 373 | Preventing HIV 374 | Anyone who has sex without a condom or shares needles is at risk of HIV infection. 375 | There are many effective ways to prevent or reduce the risk of HIV infection, including: 376 | using a condom for sex 377 | post-exposure prophylaxis (PEP) 378 | pre-exposure prophylaxis (PrEP) 379 | treatment for HIV to reduce the viral load to undetectable 380 | if you use drugs, never sharing needles or other injecting equipment, including syringes, spoons and swabs 381 | Speak to your local sexual health clinic or a GP for further advice about the best way to reduce your risk. 382 | For people with HIV, if you have been taking effective HIV treatment and your viral load has been undetectable for 6 months or more, it means you cannot pass the virus on through sex. 383 | This is called undetectable=untransmittable (U=U). 384 | Further information on U=U 385 | NAM aidsmap: undetectable equals untransmittable (U=U) consensus statement",https://www.nhs.uk/conditions/HIV/Pages/Introduction.aspx,1258,part_0 386 | 7,"Overview 387 | Heart attack 388 | A heart attack (myocardial infarction or MI) is a serious medical emergency in which the supply of blood to the heart is suddenly blocked, usually by a blood clot. 389 | A heart attack is a medical emergency. Call 999 and ask for an ambulance if you suspect a heart attack. 390 | A lack of blood to the heart may seriously damage the heart muscle and can be life threatening. 391 | Symptoms of a heart attack 392 | Symptoms of a heart attack can include: 393 | chest pain – a feeling of pressure, heaviness, tightness or squeezing across your chest 394 | pain in other parts of the body – it can feel as if the pain is spreading from your chest to your arms (usually the left arm, but it can affect both arms), jaw, neck, back and tummy 395 | feeling lightheaded or dizzy 396 | sweating 397 | shortness of breath 398 | feeling sick (nausea) or being sick (vomiting) 399 | an overwhelming feeling of anxiety (similar to a panic attack) 400 | coughing or wheezing 401 | The chest pain is often severe, but some people may only experience minor pain, similar to indigestion. 402 | While the most common symptom in both men and women is chest pain, women are more likely to have other symptoms such as shortness of breath, feeling or being sick and back or jaw pain. 403 | Call 999 immediately if you think someone might be having a heart attack. The faster you act, the better their chances. 404 | Treating heart attacks 405 | While waiting for an ambulance, it may help to chew and then swallow a tablet of aspirin (ideally 300mg), as long as the person having a heart attack is not allergic to aspirin. 406 | Aspirin helps to thin the blood and improves blood flow to the heart. 407 | In hospital, treatment for a heart attack depends on how serious it is. 408 | The 2 main treatments are: 409 | using medicines to dissolve blood clots 410 | surgery to help restore blood to the heart 411 | Causes of a heart attack 412 | Coronary heart disease (CHD) is the leading cause of heart attacks. 413 | CHD is a condition in which the major blood vessels that supply the heart get clogged with deposits of cholesterol, known as plaques. 414 | Before a heart attack, 1 of the plaques bursts (ruptures), causing a blood clot to develop at the site of the rupture. 415 | The clot may block the supply of blood to the heart, triggering a heart attack. 416 | Recovering from a heart attack 417 | The time it takes to recover from a heart attack will depend on the amount of damage to your heart muscle. 418 | Most people can return to work after having a heart attack. Some people are well enough to return to work after 2 weeks. Other people may take several months to recover. How quickly you can go back to work depends on your health, the state of your heart and the type of work you do. 419 | The recovery process aims to: 420 | reduce your risk of another heart attack through a combination of lifestyle changes (such as eating a healthy diet), and medicines (such as statins), which help to lower blood cholesterol levels 421 | gradually restore your physical fitness so you can resume normal activities (cardiac rehabilitation) 422 | Find out more about recovering from a heart attack 423 | Complications of a heart attack 424 | Complications of a heart attack can be serious and possibly life threatening. 425 | These include: 426 | arrhythmias – these are abnormal heartbeats. 1 type is where the heart begins beating faster and faster, then stops beating (cardiac arrest) 427 | cardiogenic shock – where the heart's muscles are severely damaged and can no longer contract properly to supply enough blood to maintain many body functions 428 | heart rupture – where the heart's muscles, walls or valves split apart (rupture) 429 | These complications can happen quickly after a heart attack and are a leading cause of death. 430 | Many people die suddenly from a complication of a heart attack before reaching hospital or within the 1st month after a heart attack. 431 | The outlook often depends on: 432 | age – serious complications are more likely as you get older 433 | the severity of the heart attack – how much of the heart's muscle has been damaged during the attack 434 | how long it took before a person received treatment – treatment for a heart attack should begin as soon as possible 435 | Find out more about complications of a heart attack 436 | Preventing a heart attack 437 | There are 5 main steps you can take to reduce your risk of having a heart attack (or having another heart attack): 438 | smokers should quit smoking 439 | lose weight if you're overweight or obese 440 | do regular exercise – adults should do at least 150 minutes (2 hours and 30 minutes) of moderate-intensity aerobic activity each week, unless advised otherwise by the doctor in charge of your care 441 | eat a low-fat, high-fibre diet, including wholegrains and at least 5 portions of fruit and vegetables a day 442 | moderate your alcohol consumption 443 | Video: heart attack 444 | This video explores the symptoms, surgical treatments and importance of reducing risk factors for a heart attack.",https://www.nhs.uk/conditions/Heart-attack/Pages/Introduction.aspx,1037,part_0 445 | 8,"Laryngitis 446 | Laryngitis is when your voice box or vocal cords in the throat become irritated or swollen. It usually goes away by itself within 1 to 2 weeks. 447 | Check if you have laryngitis 448 | Laryngitis usually comes on suddenly and gets worse during the first 3 days. 449 | The main symptoms are: 450 | a hoarse (croaky) voice 451 | sometimes losing your voice 452 | an irritating cough that does not go away 453 | always needing to clear your throat 454 | a sore throat 455 | Children can also: 456 | have a temperature of 38C or above 457 | be off their food or drink 458 | have difficulty breathing (but this is rare) 459 | Laryngitis is often linked to other illnesses, such as colds and flu, so you may also have other symptoms. 460 | If you're not sure it's laryngitis, check other sore throat symptoms. 461 | How you can treat laryngitis yourself 462 | Laryngitis usually goes away on its own after 1 to 2 weeks and you do not need to see a GP. 463 | Do 464 | try to speak as little as possible 465 | drink plenty of fluids 466 | keep the air moist by putting out bowls of water – central heating and air conditioning make the air dry 467 | gargle with warm salty water (children should not try this) 468 | Don’t 469 | do not talk loudly or whisper – both strain your voice 470 | do not smoke 471 | do not spend time in smoky or dusty places 472 | do not drink too much caffeine or alcohol – they cause dehydration 473 | How to gargle with salty water 474 | Dissolve half a teaspoon of salt in a glass of warm water. Warm water helps salt dissolve. 475 | Gargle with the solution then spit it out. Do not swallow it. 476 | Repeat as often as you like. 477 | This is not suitable for younger children. 478 | A pharmacist can help with laryngitis 479 | Speak to a pharmacist about your sore throat. 480 | They can give advice and suggest treatments, including: 481 | paracetamol or ibuprofen 482 | cough syrup to help with your cough 483 | solutions to gargle or lozenges for the pain 484 | Find a pharmacy 485 | Non-urgent advice: 486 | See a GP if: 487 | your symptoms do not improve after 2 weeks 488 | it's very painful or it's difficult to swallow 489 | you keep getting laryngitis or voice problems 490 | What happens at your appointment 491 | The GP will try to work out what has caused your laryngitis. 492 | They may: 493 | look inside your throat using a small mirror 494 | wipe a cotton bud around the back of your throat for testing 495 | arrange a blood test 496 | refer you to an ear, nose and throat (ENT) specialist (if you keep getting laryngitis) 497 | If your laryngitis is caused by an infection, the GP might prescribe antibiotics. 498 | Immediate action required: 499 | Call 999 or go to A&E if: 500 | you or your child are having difficulty breathing 501 | Find your nearest A&E 502 | What causes laryngitis 503 | Laryngitis usually happens when you have an infection from a virus, such as cold or flu. A flu vaccination will help prevent you getting flu. 504 | Other things that cause laryngitis include: 505 | allergies to things like dust and fumes 506 | acid from your stomach coming up your throat (acid reflux) 507 | coughing over a long time 508 | clearing your throat all the time",https://www.nhs.uk/conditions/Laryngitis/,685,part_0 509 | 9,"Overview 510 | Multiple sclerosis 511 | Multiple sclerosis (MS) is a condition that can affect the brain and spinal cord, causing a wide range of potential symptoms, including problems with vision, arm or leg movement, sensation or balance. 512 | It's a lifelong condition that can sometimes cause serious disability, although it can occasionally be mild. 513 | In many cases, it's possible to treat symptoms. Average life expectancy is slightly reduced for people with MS. 514 | It's most commonly diagnosed in people in their 20s, 30s and 40s although it can develop at any age. It's about 2 to 3 times more common in women than men. 515 | MS is one of the most common causes of disability in younger adults. 516 | Symptoms of multiple sclerosis 517 | The symptoms of MS vary widely from person to person and can affect any part of the body. 518 | The main symptoms include: 519 | fatigue 520 | difficulty walking 521 | vision problems, such as blurred vision 522 | problems controlling the bladder 523 | numbness or tingling in different parts of the body 524 | muscle stiffness and spasms 525 | problems with balance and co-ordination 526 | problems with thinking, learning and planning 527 | Depending on the type of MS you have, your symptoms may come and go in phases or get steadily worse over time (progress). 528 | Getting medical advice 529 | See a GP if you're worried you might have signs of MS. 530 | The symptoms often have many other causes, so they're not necessarily a sign of MS. 531 | Let the GP know about the specific pattern of symptoms you're experiencing. 532 | If they think you could have MS, you'll be referred to a specialist in conditions of the nervous system (a neurologist), who may suggest tests such as an MRI scan to check for features of MS. 533 | Find out more about diagnosing MS 534 | Types of multiple sclerosis 535 | MS starts in 1 of 2 general ways: with individual relapses (attacks or exacerbations) or with gradual progression. 536 | Relapsing remitting MS 537 | Between 8 and 9 of every 10 people with MS are diagnosed with the relapsing remitting type. 538 | Someone with relapsing remitting MS will have episodes of new or worsening symptoms, known as relapses. 539 | These typically worsen over a few days, last for days to weeks to months, then slowly improve over a similar time period. 540 | Relapses often occur without warning, but are sometimes associated with a period of illness or stress. 541 | The symptoms of a relapse may disappear altogether, with or without treatment, although some symptoms often persist, with repeated attacks happening over several years. 542 | Periods between attacks are known as periods of remission. These can last for years at a time. 543 | After many years (usually decades), many, but not all, people with relapsing remitting MS go on to develop secondary progressive MS. 544 | In this type of MS, symptoms gradually worsen over time without obvious attacks. Some people continue to have infrequent relapses during this stage. 545 | About two-thirds of people with relapsing remitting MS will develop secondary progressive MS. 546 | Primary progressive MS 547 | Between 1 and 2 in every 10 people with the condition start their MS with a gradual worsening of symptoms. 548 | In primary progressive MS, symptoms gradually worsen and accumulate over several years, and there are no periods of remission, though people often have periods where their condition appears to stabilise. 549 | What causes multiple sclerosis? 550 | MS is an autoimmune condition. This is when something goes wrong with the immune system and it mistakenly attacks a healthy part of the body – in this case, the brain or spinal cord of the nervous system. 551 | In MS, the immune system attacks the layer that surrounds and protects the nerves called the myelin sheath. 552 | This damages and scars the sheath, and potentially the underlying nerves, meaning that messages travelling along the nerves become slowed or disrupted. 553 | Exactly what causes the immune system to act in this way is unclear, but most experts think a combination of genetic and environmental factors is involved. 554 | Treatments for multiple sclerosis 555 | There's currently no cure for MS, but a number of treatments can help control the condition and ease symptoms. 556 | The treatment you need will depend on the specific symptoms and difficulties you have. 557 | It may include: 558 | treating relapses with short courses of steroid medicine to speed up recovery 559 | specific treatments for individual MS symptoms 560 | treatment to reduce the number of relapses using medicines called disease-modifying therapies 561 | Disease-modifying therapies may also help to slow or reduce the overall worsening of disability in people with a type of MS called relapsing remitting MS, and in some people with types called primary and secondary progressive MS, who have relapses. 562 | Unfortunately, there's currently no treatment that can slow the progress of primary progressive MS, or secondary progressive MS, where there are no relapses. 563 | Many therapies aiming to treat progressive MS are currently being researched. 564 | Living with multiple sclerosis 565 | If you have been diagnosed with MS, it's important to take care of your general health. 566 | Read more advice about living with MS 567 | Outlook 568 | MS can be a challenging condition to live with, but new treatments over the past 20 years have considerably improved the quality of life of people with the condition. 569 | MS itself is rarely fatal, but complications may arise from severe MS, such as chest or bladder infections, or swallowing difficulties. 570 | The average life expectancy for people with MS is around 5 to 10 years lower than average, and this gap appears to be getting smaller all the time. 571 | Charities and support groups for multiple sclerosis 572 | There are 2 main MS charities in the UK: 573 | MS Society 574 | MS Trust 575 | These organisations offer useful advice, publications, news items about ongoing research, blogs and chatrooms. 576 | They can be very useful if you, or someone you know, has just been diagnosed with MS. 577 | There's also the shift.ms website, an online community for younger people affected by MS. 578 | Information: 579 | Social care and support guide 580 | The social care and support guide explains your options and where you can get support if you: 581 | need help with day-to-day living because of illness or disability 582 | care for someone regularly because they're ill, elderly or disabled, including family members",https://www.nhs.uk/conditions/Multiple-sclerosis,1241,part_0 583 | -------------------------------------------------------------------------------- /data/nhs_conditions_small_sample/original_data.csv: -------------------------------------------------------------------------------- 1 | text,url 2 | "Overview 3 | High blood pressure (hypertension) 4 | High blood pressure, or hypertension, rarely has noticeable symptoms. But if untreated, it increases your risk of serious problems such as heart attacks and strokes. 5 | Around a third of adults in the UK have high blood pressure, although many will not realise it. 6 | The only way to find out if your blood pressure is high is to have your blood pressure checked. 7 | What is high blood pressure? 8 | Blood pressure is recorded with 2 numbers. The systolic pressure (higher number) is the force at which your heart pumps blood around your body. 9 | The diastolic pressure (lower number) is the resistance to the blood flow in the blood vessels. 10 | They're both measured in millimetres of mercury (mmHg). 11 | As a general guide: 12 | high blood pressure is considered to be from 140/90mmHg (or an average of 135/85mmHg at home) – or 150/90mmHg (or an average of 145/85mmHg at home) if you're over the age of 80 13 | ideal blood pressure is usually considered to be between 90/60mmHg and 120/80mmHg, while the target for over-80s is below 150/90mmHg (or 145/85mmHg at home) 14 | Blood pressure readings between 120/80mmHg and 140/90mmHg could mean you're at risk of developing high blood pressure if you do not take steps to keep your blood pressure under control. 15 | Everyone's blood pressure will be slightly different. What's considered low or high for you may be normal for someone else. 16 | Risks of high blood pressure 17 | If your blood pressure is too high, it puts extra strain on your blood vessels, heart and other organs, such as the brain, kidneys and eyes. 18 | Persistent high blood pressure can increase your risk of a number of serious and potentially life-threatening health conditions, such as: 19 | heart disease 20 | heart attacks 21 | strokes 22 | heart failure 23 | peripheral arterial disease 24 | aortic aneurysms 25 | kidney disease 26 | vascular dementia 27 | If you have high blood pressure, reducing it even a small amount can help lower your risk of these health conditions. 28 | Check your blood pressure 29 | The only way of knowing whether you have high blood pressure is to have a blood pressure test. 30 | All adults over 40 are advised to have their blood pressure checked at least every 5 years. 31 | Getting this done is easy and could save your life. 32 | You can get your blood pressure tested at a number of places, including: 33 | at your GP surgery 34 | at some pharmacies 35 | as part of your NHS Health Check 36 | in some workplaces 37 | You can also check your blood pressure yourself with a home blood pressure monitor. 38 | Find out more about getting a blood pressure test 39 | Things that can increase your risk of getting high blood pressure 40 | It's not always clear what causes high blood pressure, but there are things that can increase your risk. 41 | You might be more at risk if you: 42 | are overweight 43 | eat too much salt and do not eat enough fruit and vegetables 44 | do not do enough exercise 45 | drink too much alcohol or coffee (or other caffeine-based drinks) 46 | smoke 47 | do not get much sleep or have disturbed sleep 48 | are over 65 49 | have a relative with high blood pressure 50 | are of black African or black Caribbean descent 51 | live in a deprived area 52 | Making healthy lifestyle changes can sometimes help reduce your chances of getting high blood pressure and help lower your blood pressure if it's already high. 53 | Treatment for high blood pressure 54 | Doctors can help you keep your blood pressure to a safe level using: 55 | lifestyle changes 56 | medicines 57 | What works best is different for each person. 58 | Talk to your doctor to help you decide about treatment. 59 | This patient decision aid (PDF, 132kb) can also help you to understand your treatment options. 60 | Lifestyle changes to reduce blood pressure 61 | These lifestyle changes can help prevent and lower high blood pressure: 62 | reduce the amount of salt you eat and have a generally healthy diet 63 | cut back on alcohol 64 | lose weight if you're overweight 65 | exercise regularly 66 | cut down on caffeine 67 | stop smoking 68 | Some people with high blood pressure may also need to take 1 or more medicines to stop their blood pressure getting too high. 69 | Medicines for high blood pressure 70 | If you're diagnosed with high blood pressure, your doctor may recommend taking 1 or more medicines to keep it under control. 71 | These come as tablets and usually need to be taken once a day. 72 | Common blood pressure medicines include: 73 | ACE inhibitors – such as enalapril, lisinopril, perindopril and ramipril 74 | angiotensin-2 receptor blockers (ARBs) – such as candesartan, irbesartan, losartan, valsartan and olmesartan 75 | calcium channel blockers – such as amlodipine, felodipine and nifedipine or diltiazem and verapamil 76 | diuretics – such as indapamide and bendroflumethiazide 77 | beta blockers – such as atenolol and bisoprolol 78 | alpha blockers – such as doxazosin 79 | other diuretics – such as amiloride and spironolactone 80 | The medicine recommended for you will depend on things like how high your blood pressure is, your age and your ethnicity.",https://www.nhs.uk/conditions/Blood-pressure-(high)/Pages/Introduction.aspx 81 | "Bronchiolitis 82 | Bronchiolitis is a common chest infection that affects babies and children under 2. It's usually mild and can be treated at home, but it can be serious. 83 | Bronchiolitis is different from bronchitis, which causes a cough with lots of mucus and can affect people of all ages. 84 | Check if it's bronchiolitis 85 | The early symptoms of bronchiolitis are similar to a cold, such as sneezing, a runny or blocked nose, a cough and a slightly high temperature of 38C. 86 | A child with bronchiolitis may then get other symptoms, such as: 87 | breathing more quickly 88 | finding it difficult to feed or eat 89 | noisy breathing (wheezing) 90 | becoming irritable 91 | Symptoms are usually worst between days 3 and 5, and the cough usually gets better in 3 weeks. 92 | Immediate action required: 93 | Call 999 or go to A&E if: 94 | your child is having difficulty breathing – you may notice grunting noises or their tummy sucking under their ribs 95 | there are pauses when your child breathes 96 | your child's skin, tongue or lips are blue 97 | your child is floppy and will not wake up or stay awake 98 | As a parent, you may know if your child seems seriously unwell and should trust your own judgement. 99 | Find your nearest A&E 100 | Urgent advice: 101 | Ask for an urgent GP appointment or call 111 if: 102 | your child has had a cold and it's getting worse 103 | your child is feeding or eating much less than normal 104 | your child has had a dry nappy for 12 hours or more, or shows other signs of dehydration 105 | your baby is under 3 months and has a temperature of 38C, or is older than 3 months and has a temperature of 39C or higher 106 | your baby feels hotter than usual when you touch their back or chest, or feels sweaty 107 | your child is very tired or irritable 108 | Treatments for bronchiolitis 109 | There's no specific treatment for bronchiolitis. It usually gets better on its own and you can look after your child at home. 110 | But it can be serious in some children, who may need to be treated in hospital. 111 | Do 112 | give children's paracetamol to babies and children over 2 months old or ibuprofen to babies and children over 3 months old – but do not give aspirin to a child under 16 113 | try using salt water (saline) drops if your child's nose is blocked 114 | keep your child upright as much as possible when they're awake – this will help them breathe more easily 115 | encourage your child to drink lots of fluids – try smaller feeds more often in babies, and give older children extra water or diluted fruit juice 116 | Don’t 117 | do not smoke around your child 118 | do not try to lower your child's temperature by sponging them with cool water or taking off all their clothes 119 | Preventing bronchiolitis 120 | There are some things you can do to lower the chances of your child getting bronchiolitis or spreading the viruses that cause it, such as: 121 | wash your hands and your child's hands often 122 | wash or wipe down toys and clean surfaces regularly 123 | use disposable tissues and throw them away as soon as you've used them 124 | keep newborn babies away from anyone with a cold or the flu – especially if they're under 2 months old or were premature 125 | It's also important not to smoke around your child. Children who breathe in cigarette smoke have a higher risk of getting bronchiolitis. 126 | Children at risk of severe bronchiolitis 127 | Some children may have a higher risk of getting seriously ill with bronchiolitis. 128 | This includes children who: 129 | were born very prematurely 130 | have a heart or lung condition 131 | have a weakened immune system 132 | These children may be able to have treatment in the winter (between October and March) to stop them getting severe bronchiolitis. 133 | Causes of bronchiolitis 134 | Bronchiolitis is caused by a viral infection, usually the respiratory syncytial virus (RSV). 135 | RSV is very common and spreads easily in coughs and sneezes. Almost all children have had it by the time they're 2. 136 | In older children and adults, RSV may cause a cough or cold, but in young children it can cause bronchiolitis.",https://www.nhs.uk/conditions/Bronchiolitis/ 137 | "Bronchitis 138 | Bronchitis is inflammation of the airways in the lungs that is usually caused by an infection. It often gets better without treatment in around 3 weeks. 139 | Some people have long-term inflammation of the airways in the lungs called chronic bronchitis. This is known as chronic obstructive pulmonary disease (COPD). 140 | Check if you have bronchitis 141 | Symptoms of bronchitis can be similar to a cold or flu. 142 | Symptoms include: 143 | a cough – you may cough up clear, white, yellow or green mucus 144 | chest pain when coughing 145 | shortness of breath 146 | a sore throat 147 | a runny nose 148 | a high temperature 149 | Things you can do to help with bronchitis 150 | There are some things you can do to ease the symptoms of bronchitis and reduce the risk of spreading infections to other people. 151 | Do 152 | get plenty of rest – try to stay at home and avoid contact with other people if you have a high temperature or do not feel well enough to do your normal activities 153 | drink plenty of fluids 154 | take painkillers like paracetamol or ibuprofen to help with pain and bring down a high temperature 155 | try adding honey to a warm drink to help soothe your throat (do not give honey to babies under 1) 156 | cover your mouth and nose with a tissue when you cough or sneeze – put used tissues in the bin as quickly as possible 157 | wash your hands regularly with water and soap 158 | Don’t 159 | do not smoke 160 | Urgent advice: 161 | Ask for an urgent GP appointment or get help from NHS 111 if: 162 | you've had a cough for more than 3 weeks 163 | you cough up blood or blood-stained mucus 164 | you have chest pain that comes and goes, or when breathing or coughing 165 | you're over 65 166 | you're pregnant 167 | you have a long-term condition, such as diabetes, or a heart, lung or kidney condition 168 | you have a weakened immune system – for example, you have a condition that affects the immune system, or you're having chemotherapy 169 | you feel very unwell 170 | Immediate action required: 171 | Call 999 if: 172 | you are struggling to breathe – you are choking, gasping and unable to speak 173 | you have pale, blue or blotchy skin, lips or tongue – on brown or black skin, this may be easier to see on the lips, tongue or gums, under the nails or around the eyes 174 | you suddenly feel confused – for example, you do not know where you are 175 | you're unable to wake your baby or they feel floppy 176 | Treatments for bronchitis 177 | Bronchitis usually clears up without treatment in around 3 weeks. See a GP if your symptoms last longer than 3 weeks. 178 | You may need antibiotics if your bronchitis is caused by a bacterial infection.",https://www.nhs.uk/conditions/Bronchitis/ 179 | "Steroids 180 | Steroids, also called corticosteroids, are anti-inflammatory medicines used to treat a range of conditions. 181 | They're different from anabolic steroids, which are often used illegally by some people to increase their muscle mass. 182 | Types of steroids 183 | Steroids come in many different forms. 184 | The main types are: 185 | tablets, syrups and liquids – such as prednisolone 186 | inhalers – such as beclometasone and fluticasone 187 | nasal sprays – such as beclometasone and fluticasone 188 | injections (given into joints, muscles or blood vessels) – such as methylprednisolone 189 | creams, lotions and gels – such as hydrocortisone skin cream 190 | Most steroids are only available on prescription, but a few (such as some creams or nasal sprays) can be bought from pharmacies and shops. 191 | Side effects of steroids 192 | Steroids do not tend to cause significant side effects if they're taken for a short time or at a low dose. 193 | But sometimes they can cause unpleasant side effects, such as an increased appetite, mood changes and difficulty sleeping. This is most common with steroid tablets. 194 | The side effects will usually pass once you finish the treatment, but do not stop taking your medicine without speaking to your doctor. Stopping a prescribed course of medicine can cause further unpleasant side effects (withdrawal symptoms). 195 | Read more about: 196 | side effects of steroid tablets 197 | side effects of steroid inhalers 198 | side effects of steroid nasal sprays 199 | side effects of steroid injections 200 | side effects of steroid creams 201 | You can report any suspected side effect to the Yellow Card Scheme. 202 | Uses for steroids 203 | Steroids can be used to treat a wide range of conditions, including: 204 | asthma and chronic obstructive pulmonary disease (COPD) 205 | hay fever 206 | hives and eczema 207 | painful joints or muscles – such as arthritis, tennis elbow and frozen shoulder 208 | pain caused by an irritated or trapped nerve – such as sciatica 209 | inflammatory bowel disease – such as Crohn's disease 210 | lupus 211 | multiple sclerosis (MS) 212 | How steroids work 213 | Steroids are a man-made version of hormones normally produced by the adrenal glands which are 2 small glands found above the kidneys. 214 | When taken in doses higher than the amount your body normally produces, steroids reduce redness and swelling (inflammation). This can help with inflammatory conditions such as asthma and eczema. 215 | Steroids also reduce the activity of the immune system, which is the body's natural defence against illness and infection. 216 | This can help treat autoimmune conditions, such as rheumatoid arthritis or lupus, which are caused by the immune system mistakenly attacking the body.",https://www.nhs.uk/conditions/Corticosteroid-(drugs)/Pages/Introduction.aspx 217 | "Overview 218 | Creutzfeldt-Jakob disease 219 | Creutzfeldt-Jakob disease (CJD) is a rare and fatal condition that affects the brain. It causes brain damage that worsens rapidly over time. 220 | Symptoms of CJD 221 | Symptoms of CJD include: 222 | loss of intellect and memory 223 | changes in personality 224 | loss of balance and co-ordination 225 | slurred speech 226 | vision problems and blindness 227 | abnormal jerking movements 228 | progressive loss of brain function and mobility 229 | Most people with CJD will die within a year of the symptoms starting, usually from infection. 230 | This is because the immobility caused by CJD can make people with the condition vulnerable to infection. 231 | Read more about the symptoms of Creutzfeldt-Jakob disease and diagnosing Creutzfeldt-Jakob disease. 232 | What causes CJD? 233 | CJD appears to be caused by an abnormal infectious protein called a prion. These prions accumulate at high levels in the brain and cause irreversible damage to nerve cells. 234 | While the abnormal prions are technically infectious, they're very different from viruses and bacteria. 235 | For example, prions aren't destroyed by the extremes of heat and radiation used to kill bacteria and viruses, and antibiotics or antiviral medicines have no effect on them. 236 | Read more about the causes of Creutzfeldt-Jakob disease. 237 | Types of CJD 238 | There are 4 main types of CJD. 239 | Sporadic CJD 240 | Sporadic CJD is the most common type. 241 | The precise cause of sporadic CJD is unclear, but it's been suggested that a normal brain protein changes abnormally (""misfolds"") and turns into a prion. 242 | Most cases of sporadic CJD occur in adults aged between 45 and 75. On average, symptoms develop between the ages of 60 and 65. 243 | Despite being the most common type of CJD, sporadic CJD is still very rare, affecting only 1 or 2 people in every million each year in the UK. 244 | In 2020, there were 131 recorded deaths from sporadic CJD in the UK. 245 | Variant CJD 246 | Variant CJD (vCJD) is likely to be caused by consuming meat from a cow that had bovine spongiform encephalopathy (BSE, or ""mad cow"" disease), a similar prion disease to CJD. 247 | Since the link between variant CJD and BSE was discovered in 1996, strict controls have proved very effective in preventing meat from infected cattle entering the food chain. 248 | See preventing Creutzfeldt-Jakob disease for more information. 249 | But the average time it takes for the symptoms of variant CJD to occur after initial infection (the incubation period) is still unclear. 250 | The incubation period could be very long (more than 10 years) in some people, so those exposed to infected meat before the food controls were introduced can still develop variant CJD. 251 | The prion that causes variant CJD can also be transmitted by blood transfusion, although this has only happened 5 times in the UK. 252 | In 2020, there were no recorded deaths from variant CJD in the UK. 253 | Familial or inherited CJD 254 | Familial CJD is a very rare genetic condition where one of the genes a person inherits from their parent (the prion protein gene) carries a mutation that causes prions to form in their brain during adulthood, triggering the symptoms of CJD. 255 | It affects about 1 in every 9 million people in the UK. 256 | The symptoms of familial CJD usually first develop in people when they're in their early 50s. 257 | In 2020, there were 6 deaths from familial CJD and similar inherited prion diseases in the UK. 258 | Iatrogenic CJD 259 | Iatrogenic CJD is where the infection is accidentally spread from someone with CJD through medical or surgical treatment. 260 | For example, a common cause of iatrogenic CJD in the past was growth hormone treatment using human pituitary growth hormones extracted from deceased individuals, some of whom were infected with CJD. 261 | Synthetic versions of human growth hormone have been used since 1985, so this is no longer a risk. 262 | Iatrogenic CJD can also occur if instruments used during brain surgery on a person with CJD aren't properly cleaned between each surgical procedure and are reused on another person. 263 | But increased awareness of these risks means iatrogenic CJD is now very rare. 264 | In 2020, there was 1 death from iatrogenic CJD in the UK caused by receiving human growth hormone before 1985. 265 | How CJD is treated 266 | There's currently no cure for CJD, so treatment aims to relieve symptoms and make the affected person feel as comfortable as possible. 267 | This can include using medicine such as antidepressants to help with anxiety and depression, and painkillers to relieve pain. 268 | Some people will need nursing care and assistance with feeding. 269 | Read more about treating Creutzfeldt-Jakob disease. 270 | Variant CJD compensation scheme 271 | In October 2001, the government announced a compensation scheme for UK victims of variant CJD. 272 | The vCJD Trust assesses claims and pays compensation to victims and their families.",https://www.nhs.uk/conditions/Creutzfeldt-Jakob-disease/Pages/Introduction.aspx 273 | "Overview 274 | Atopic eczema 275 | Atopic eczema (atopic dermatitis) is the most common form of eczema, a condition that causes the skin to become itchy, dry and cracked. 276 | Atopic eczema is more common in children, often developing before their first birthday. But it may also develop for the first time in adults. 277 | It's usually a long-term (chronic) condition, although it can improve significantly, or even clear completely, in some children as they get older. 278 | Symptoms of atopic eczema 279 | Atopic eczema causes the skin to become itchy, dry, cracked and sore. 280 | Some people only have small patches of dry skin, but others may experience widespread inflamed skin all over the body. 281 | Inflamed skin can become red on lighter skin, and darker brown, purple or grey on darker skin. This can also be more difficult to see on darker skin. 282 | Although atopic eczema can affect any part of the body, it most often affects the hands, insides of the elbows, backs of the knees and the face and scalp in children. 283 | People with atopic eczema usually have periods when symptoms are less noticeable, as well as periods when symptoms become more severe (flare-ups). 284 | When to seek medical advice 285 | See a GP if you have symptoms of atopic eczema. They'll usually be able to diagnose atopic eczema by looking at your skin and asking questions, such as: 286 | whether the rash is itchy and where it appears 287 | when the symptoms first began 288 | whether it comes and goes over time 289 | whether there's a history of atopic eczema in your family 290 | whether you have any other conditions, such as allergies or asthma 291 | whether something in your diet or lifestyle may be contributing to your symptoms 292 | Typically, to be diagnosed with atopic eczema you should have had an itchy skin condition in the last 12 months and 3 or more of the following: 293 | visibly irritated red skin in the creases of your skin – such as the insides of your elbows or behind your knees (or on the cheeks, outsides of elbows, or fronts of the knees in children aged 18 months or under) at the time of examination by a health professional 294 | a history of skin irritation occurring in the same areas mentioned above 295 | generally dry skin in the last 12 months 296 | a history of asthma or hay fever – children under 4 must have an immediate relative, such as a parent, brother or sister, who has 1 of these conditions 297 | the condition started before the age of 2 (this does not apply to children under the age of 4) 298 | Causes of atopic eczema 299 | The exact cause of atopic eczema is unknown, but it's clear it is not down to one single thing. 300 | Atopic eczema often occurs in people who get allergies. ""Atopic"" means sensitivity to allergens. 301 | It can run in families, and often develops alongside other conditions, such as asthma and hay fever. 302 | The symptoms of atopic eczema often have certain triggers, such as soaps, detergents, stress and the weather. 303 | Sometimes food allergies can play a part, especially in young children with severe eczema. 304 | You may be asked to keep a food diary to try to determine whether a specific food makes your symptoms worse. 305 | Allergy tests are not usually needed, although they're sometimes helpful in identifying whether a food allergy may be triggering symptoms. 306 | Treating atopic eczema 307 | Treatment for atopic eczema can help to relieve the symptoms and many cases improve over time. 308 | But there's currently no cure and severe eczema often has a significant impact on daily life, which may be difficult to cope with physically and mentally. 309 | There's also an increased risk of skin infections. 310 | Many different treatments can be used to control symptoms and manage eczema, including: 311 | self-care techniques, such as reducing scratching and avoiding triggers 312 | emollients (moisturising treatments) – used on a daily basis for dry skin 313 | topical corticosteroids – used to reduce swelling, redness and itching during flare-ups 314 | Other types of eczema 315 | Eczema is the name for a group of skin conditions that cause dry, irritated skin. 316 | Other types of eczema include: 317 | discoid eczema – a type of eczema that occurs in circular or oval patches on the skin 318 | contact dermatitis – a type of eczema that occurs when the body comes into contact with a particular substance 319 | varicose eczema – a type of eczema that most often affects the lower legs and is caused by problems with the flow of blood through the leg veins 320 | seborrhoeic eczema – a type of eczema where red, scaly patches develop on the sides of the nose, eyebrows, ears and scalp 321 | dyshidrotic eczema (pompholyx) – a type of eczema that causes tiny blisters to erupt across the palms of the hands",https://www.nhs.uk/conditions/Eczema-(atopic)/Pages/Introduction.aspx 322 | "Overview 323 | HIV and AIDS 324 | HIV (human immunodeficiency virus) is a virus that damages the cells in your immune system and weakens your ability to fight everyday infections and disease. 325 | AIDS (acquired immune deficiency syndrome) is the name used to describe a number of potentially life-threatening infections and illnesses that happen when your immune system has been severely damaged by the HIV virus. 326 | While AIDS cannot be transmitted from 1 person to another, the HIV virus can. 327 | There's currently no cure for HIV, but there are very effective drug treatments that enable most people with the virus to live a long and healthy life. 328 | With an early diagnosis and effective treatments, most people with HIV will not develop any AIDS-related illnesses and will live a near-normal lifespan. 329 | Symptoms of HIV infection 330 | Most people experience a short flu-like illness 2 to 6 weeks after HIV infection, which lasts for a week or 2. 331 | After these symptoms disappear, HIV may not cause any symptoms for many years, although the virus continues to damage your immune system. 332 | This means many people with HIV do not know they're infected. 333 | Anyone who thinks they could have HIV should get tested. 334 | Some people are advised to have regular tests as they're at particularly high risk. 335 | Read more about who's most at risk of HIV 336 | Causes of HIV infection 337 | HIV is found in the body fluids of an infected person. This includes semen, vaginal and anal fluids, blood and breast milk. 338 | It's a fragile virus and does not survive outside the body for long. 339 | HIV cannot be transmitted through sweat, urine or saliva. 340 | The most common way of getting HIV in the UK is through having anal or vaginal sex without a condom. 341 | Other ways of getting HIV include: 342 | sharing needles, syringes or other injecting equipment 343 | transmission from mother to baby during pregnancy, birth or breastfeeding 344 | The chance of getting HIV through oral sex is very low and will be dependent on many things, such as whether you receive or give oral sex and the oral hygiene of the person giving the oral sex. 345 | Diagnosing HIV 346 | Seek medical advice as soon as possible if you think you might have been exposed to HIV. 347 | You can get tested in a number of places, including at a GP surgery, sexual health clinics and clinics run by charities. 348 | Find HIV testing services near you 349 | The only way to find out if you have HIV is to have an HIV test. This involves testing a sample of your blood or saliva for signs of the infection. 350 | It's important to be aware that: 351 | emergency anti-HIV medicine called post-exposure prophylaxis (PEP) may stop you becoming infected if started within 72 hours of possible exposure to the virus – it's recommended that you start it as soon as possible, ideally within 24 hours 352 | an early diagnosis means you can start treatment sooner, which can improve your chances of controlling the virus, reduce the risk of becoming more unwell and reduce the chance of passing the virus on to others 353 | Both positive and negative HIV tests may need to be repeated 1 to 3 months after potential exposure to HIV infection (this is known as the window period), but you should not wait this long to seek help: 354 | clinics may offer a finger prick blood test, which can give you a result in minutes, but it may take up to a few days to get the results of a more detailed HIV test 355 | home testing or home sampling kits are available to buy online or from pharmacies – depending on the type of test you use, your result will be available in a few minutes or a few days 356 | If your first test suggests you have HIV, a further blood test will need to be carried out to confirm the result. 357 | If this is positive, you'll be referred to a specialist HIV clinic for some more tests and a discussion about your treatment options. 358 | Treatment for HIV 359 | Antiretroviral medicines are used to treat HIV. They work by stopping the virus replicating in the body, allowing the immune system to repair itself and preventing further damage. 360 | These come in the form of tablets, which need to be taken every day. 361 | HIV is able to develop resistance to a single HIV medicine very easily, but taking a combination of different medicines makes this much less likely. 362 | Most people with HIV take a combination of medicines. It's vital these are taken every day as recommended by your doctor. 363 | The goal of HIV treatment is to have an undetectable viral load. This means the level of HIV virus in your body is low enough to not be detected by a test. 364 | Living with HIV 365 | If you're living with HIV, taking effective HIV treatment and being undetectable significantly reduces your risk of passing HIV on to others. 366 | You'll also be encouraged to: 367 | take regular exercise 368 | eat a healthy diet 369 | stop smoking 370 | have yearly flu jabs to minimise the risk of getting serious illnesses 371 | Without treatment, the immune system will become severely damaged, and life-threatening illnesses such as cancer and severe infections can occur. 372 | If you're planning on getting pregnant, it's important to talk to a GP. Although rare, it's possible to transmit HIV to your baby. 373 | Preventing HIV 374 | Anyone who has sex without a condom or shares needles is at risk of HIV infection. 375 | There are many effective ways to prevent or reduce the risk of HIV infection, including: 376 | using a condom for sex 377 | post-exposure prophylaxis (PEP) 378 | pre-exposure prophylaxis (PrEP) 379 | treatment for HIV to reduce the viral load to undetectable 380 | if you use drugs, never sharing needles or other injecting equipment, including syringes, spoons and swabs 381 | Speak to your local sexual health clinic or a GP for further advice about the best way to reduce your risk. 382 | For people with HIV, if you have been taking effective HIV treatment and your viral load has been undetectable for 6 months or more, it means you cannot pass the virus on through sex. 383 | This is called undetectable=untransmittable (U=U). 384 | Further information on U=U 385 | NAM aidsmap: undetectable equals untransmittable (U=U) consensus statement",https://www.nhs.uk/conditions/HIV/Pages/Introduction.aspx 386 | "Overview 387 | Heart attack 388 | A heart attack (myocardial infarction or MI) is a serious medical emergency in which the supply of blood to the heart is suddenly blocked, usually by a blood clot. 389 | A heart attack is a medical emergency. Call 999 and ask for an ambulance if you suspect a heart attack. 390 | A lack of blood to the heart may seriously damage the heart muscle and can be life threatening. 391 | Symptoms of a heart attack 392 | Symptoms of a heart attack can include: 393 | chest pain – a feeling of pressure, heaviness, tightness or squeezing across your chest 394 | pain in other parts of the body – it can feel as if the pain is spreading from your chest to your arms (usually the left arm, but it can affect both arms), jaw, neck, back and tummy 395 | feeling lightheaded or dizzy 396 | sweating 397 | shortness of breath 398 | feeling sick (nausea) or being sick (vomiting) 399 | an overwhelming feeling of anxiety (similar to a panic attack) 400 | coughing or wheezing 401 | The chest pain is often severe, but some people may only experience minor pain, similar to indigestion. 402 | While the most common symptom in both men and women is chest pain, women are more likely to have other symptoms such as shortness of breath, feeling or being sick and back or jaw pain. 403 | Call 999 immediately if you think someone might be having a heart attack. The faster you act, the better their chances. 404 | Treating heart attacks 405 | While waiting for an ambulance, it may help to chew and then swallow a tablet of aspirin (ideally 300mg), as long as the person having a heart attack is not allergic to aspirin. 406 | Aspirin helps to thin the blood and improves blood flow to the heart. 407 | In hospital, treatment for a heart attack depends on how serious it is. 408 | The 2 main treatments are: 409 | using medicines to dissolve blood clots 410 | surgery to help restore blood to the heart 411 | Causes of a heart attack 412 | Coronary heart disease (CHD) is the leading cause of heart attacks. 413 | CHD is a condition in which the major blood vessels that supply the heart get clogged with deposits of cholesterol, known as plaques. 414 | Before a heart attack, 1 of the plaques bursts (ruptures), causing a blood clot to develop at the site of the rupture. 415 | The clot may block the supply of blood to the heart, triggering a heart attack. 416 | Recovering from a heart attack 417 | The time it takes to recover from a heart attack will depend on the amount of damage to your heart muscle. 418 | Most people can return to work after having a heart attack. Some people are well enough to return to work after 2 weeks. Other people may take several months to recover. How quickly you can go back to work depends on your health, the state of your heart and the type of work you do. 419 | The recovery process aims to: 420 | reduce your risk of another heart attack through a combination of lifestyle changes (such as eating a healthy diet), and medicines (such as statins), which help to lower blood cholesterol levels 421 | gradually restore your physical fitness so you can resume normal activities (cardiac rehabilitation) 422 | Find out more about recovering from a heart attack 423 | Complications of a heart attack 424 | Complications of a heart attack can be serious and possibly life threatening. 425 | These include: 426 | arrhythmias – these are abnormal heartbeats. 1 type is where the heart begins beating faster and faster, then stops beating (cardiac arrest) 427 | cardiogenic shock – where the heart's muscles are severely damaged and can no longer contract properly to supply enough blood to maintain many body functions 428 | heart rupture – where the heart's muscles, walls or valves split apart (rupture) 429 | These complications can happen quickly after a heart attack and are a leading cause of death. 430 | Many people die suddenly from a complication of a heart attack before reaching hospital or within the 1st month after a heart attack. 431 | The outlook often depends on: 432 | age – serious complications are more likely as you get older 433 | the severity of the heart attack – how much of the heart's muscle has been damaged during the attack 434 | how long it took before a person received treatment – treatment for a heart attack should begin as soon as possible 435 | Find out more about complications of a heart attack 436 | Preventing a heart attack 437 | There are 5 main steps you can take to reduce your risk of having a heart attack (or having another heart attack): 438 | smokers should quit smoking 439 | lose weight if you're overweight or obese 440 | do regular exercise – adults should do at least 150 minutes (2 hours and 30 minutes) of moderate-intensity aerobic activity each week, unless advised otherwise by the doctor in charge of your care 441 | eat a low-fat, high-fibre diet, including wholegrains and at least 5 portions of fruit and vegetables a day 442 | moderate your alcohol consumption 443 | Video: heart attack 444 | This video explores the symptoms, surgical treatments and importance of reducing risk factors for a heart attack.",https://www.nhs.uk/conditions/Heart-attack/Pages/Introduction.aspx 445 | "Laryngitis 446 | Laryngitis is when your voice box or vocal cords in the throat become irritated or swollen. It usually goes away by itself within 1 to 2 weeks. 447 | Check if you have laryngitis 448 | Laryngitis usually comes on suddenly and gets worse during the first 3 days. 449 | The main symptoms are: 450 | a hoarse (croaky) voice 451 | sometimes losing your voice 452 | an irritating cough that does not go away 453 | always needing to clear your throat 454 | a sore throat 455 | Children can also: 456 | have a temperature of 38C or above 457 | be off their food or drink 458 | have difficulty breathing (but this is rare) 459 | Laryngitis is often linked to other illnesses, such as colds and flu, so you may also have other symptoms. 460 | If you're not sure it's laryngitis, check other sore throat symptoms. 461 | How you can treat laryngitis yourself 462 | Laryngitis usually goes away on its own after 1 to 2 weeks and you do not need to see a GP. 463 | Do 464 | try to speak as little as possible 465 | drink plenty of fluids 466 | keep the air moist by putting out bowls of water – central heating and air conditioning make the air dry 467 | gargle with warm salty water (children should not try this) 468 | Don’t 469 | do not talk loudly or whisper – both strain your voice 470 | do not smoke 471 | do not spend time in smoky or dusty places 472 | do not drink too much caffeine or alcohol – they cause dehydration 473 | How to gargle with salty water 474 | Dissolve half a teaspoon of salt in a glass of warm water. Warm water helps salt dissolve. 475 | Gargle with the solution then spit it out. Do not swallow it. 476 | Repeat as often as you like. 477 | This is not suitable for younger children. 478 | A pharmacist can help with laryngitis 479 | Speak to a pharmacist about your sore throat. 480 | They can give advice and suggest treatments, including: 481 | paracetamol or ibuprofen 482 | cough syrup to help with your cough 483 | solutions to gargle or lozenges for the pain 484 | Find a pharmacy 485 | Non-urgent advice: 486 | See a GP if: 487 | your symptoms do not improve after 2 weeks 488 | it's very painful or it's difficult to swallow 489 | you keep getting laryngitis or voice problems 490 | What happens at your appointment 491 | The GP will try to work out what has caused your laryngitis. 492 | They may: 493 | look inside your throat using a small mirror 494 | wipe a cotton bud around the back of your throat for testing 495 | arrange a blood test 496 | refer you to an ear, nose and throat (ENT) specialist (if you keep getting laryngitis) 497 | If your laryngitis is caused by an infection, the GP might prescribe antibiotics. 498 | Immediate action required: 499 | Call 999 or go to A&E if: 500 | you or your child are having difficulty breathing 501 | Find your nearest A&E 502 | What causes laryngitis 503 | Laryngitis usually happens when you have an infection from a virus, such as cold or flu. A flu vaccination will help prevent you getting flu. 504 | Other things that cause laryngitis include: 505 | allergies to things like dust and fumes 506 | acid from your stomach coming up your throat (acid reflux) 507 | coughing over a long time 508 | clearing your throat all the time",https://www.nhs.uk/conditions/Laryngitis/ 509 | "Overview 510 | Multiple sclerosis 511 | Multiple sclerosis (MS) is a condition that can affect the brain and spinal cord, causing a wide range of potential symptoms, including problems with vision, arm or leg movement, sensation or balance. 512 | It's a lifelong condition that can sometimes cause serious disability, although it can occasionally be mild. 513 | In many cases, it's possible to treat symptoms. Average life expectancy is slightly reduced for people with MS. 514 | It's most commonly diagnosed in people in their 20s, 30s and 40s although it can develop at any age. It's about 2 to 3 times more common in women than men. 515 | MS is one of the most common causes of disability in younger adults. 516 | Symptoms of multiple sclerosis 517 | The symptoms of MS vary widely from person to person and can affect any part of the body. 518 | The main symptoms include: 519 | fatigue 520 | difficulty walking 521 | vision problems, such as blurred vision 522 | problems controlling the bladder 523 | numbness or tingling in different parts of the body 524 | muscle stiffness and spasms 525 | problems with balance and co-ordination 526 | problems with thinking, learning and planning 527 | Depending on the type of MS you have, your symptoms may come and go in phases or get steadily worse over time (progress). 528 | Getting medical advice 529 | See a GP if you're worried you might have signs of MS. 530 | The symptoms often have many other causes, so they're not necessarily a sign of MS. 531 | Let the GP know about the specific pattern of symptoms you're experiencing. 532 | If they think you could have MS, you'll be referred to a specialist in conditions of the nervous system (a neurologist), who may suggest tests such as an MRI scan to check for features of MS. 533 | Find out more about diagnosing MS 534 | Types of multiple sclerosis 535 | MS starts in 1 of 2 general ways: with individual relapses (attacks or exacerbations) or with gradual progression. 536 | Relapsing remitting MS 537 | Between 8 and 9 of every 10 people with MS are diagnosed with the relapsing remitting type. 538 | Someone with relapsing remitting MS will have episodes of new or worsening symptoms, known as relapses. 539 | These typically worsen over a few days, last for days to weeks to months, then slowly improve over a similar time period. 540 | Relapses often occur without warning, but are sometimes associated with a period of illness or stress. 541 | The symptoms of a relapse may disappear altogether, with or without treatment, although some symptoms often persist, with repeated attacks happening over several years. 542 | Periods between attacks are known as periods of remission. These can last for years at a time. 543 | After many years (usually decades), many, but not all, people with relapsing remitting MS go on to develop secondary progressive MS. 544 | In this type of MS, symptoms gradually worsen over time without obvious attacks. Some people continue to have infrequent relapses during this stage. 545 | About two-thirds of people with relapsing remitting MS will develop secondary progressive MS. 546 | Primary progressive MS 547 | Between 1 and 2 in every 10 people with the condition start their MS with a gradual worsening of symptoms. 548 | In primary progressive MS, symptoms gradually worsen and accumulate over several years, and there are no periods of remission, though people often have periods where their condition appears to stabilise. 549 | What causes multiple sclerosis? 550 | MS is an autoimmune condition. This is when something goes wrong with the immune system and it mistakenly attacks a healthy part of the body – in this case, the brain or spinal cord of the nervous system. 551 | In MS, the immune system attacks the layer that surrounds and protects the nerves called the myelin sheath. 552 | This damages and scars the sheath, and potentially the underlying nerves, meaning that messages travelling along the nerves become slowed or disrupted. 553 | Exactly what causes the immune system to act in this way is unclear, but most experts think a combination of genetic and environmental factors is involved. 554 | Treatments for multiple sclerosis 555 | There's currently no cure for MS, but a number of treatments can help control the condition and ease symptoms. 556 | The treatment you need will depend on the specific symptoms and difficulties you have. 557 | It may include: 558 | treating relapses with short courses of steroid medicine to speed up recovery 559 | specific treatments for individual MS symptoms 560 | treatment to reduce the number of relapses using medicines called disease-modifying therapies 561 | Disease-modifying therapies may also help to slow or reduce the overall worsening of disability in people with a type of MS called relapsing remitting MS, and in some people with types called primary and secondary progressive MS, who have relapses. 562 | Unfortunately, there's currently no treatment that can slow the progress of primary progressive MS, or secondary progressive MS, where there are no relapses. 563 | Many therapies aiming to treat progressive MS are currently being researched. 564 | Living with multiple sclerosis 565 | If you have been diagnosed with MS, it's important to take care of your general health. 566 | Read more advice about living with MS 567 | Outlook 568 | MS can be a challenging condition to live with, but new treatments over the past 20 years have considerably improved the quality of life of people with the condition. 569 | MS itself is rarely fatal, but complications may arise from severe MS, such as chest or bladder infections, or swallowing difficulties. 570 | The average life expectancy for people with MS is around 5 to 10 years lower than average, and this gap appears to be getting smaller all the time. 571 | Charities and support groups for multiple sclerosis 572 | There are 2 main MS charities in the UK: 573 | MS Society 574 | MS Trust 575 | These organisations offer useful advice, publications, news items about ongoing research, blogs and chatrooms. 576 | They can be very useful if you, or someone you know, has just been diagnosed with MS. 577 | There's also the shift.ms website, an online community for younger people affected by MS. 578 | Information: 579 | Social care and support guide 580 | The social care and support guide explains your options and where you can get support if you: 581 | need help with day-to-day living because of illness or disability 582 | care for someone regularly because they're ill, elderly or disabled, including family members",https://www.nhs.uk/conditions/Multiple-sclerosis 583 | -------------------------------------------------------------------------------- /data/prompts.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "hash": "f4df95ec69", 4 | "text": "Given the following context: {context}\n\n----------------\nCreate a conversation between a Patient concerned about the symptoms and disorders presented above and an AI-Assistant. The conversation should abide by the following rules and instructions:\n1. The output should be in {language}.\n2. All questions and answers have to be strictly based on the context provided above.\n3. The questions should be diverse and cover different aspects of the context provided above.\n4. When generating the conversation please always use the English placeholders \"Patient\" for the Patient and \"AI-Assistant\" for the AI-Assistant, irrelevant of the language.\n5. The conversation should always be started by the Patient.\n6. The patient should start the conversation with a \"Hi,\".\n7. The AI-Assistant should be helpful, and supportive and try to not scare the patient. The answers have be to detailed and self-contained.", 5 | "description": "This will generate a conversation between a Patient and an AI assistant in the specified languages. The only argument for this prompt is the {language} argument, if not provided it will default to English.", 6 | "parser": "medical_conversation_parser" 7 | }, 8 | { 9 | "hash": "f53cf99826", 10 | "text": "Given the following context: {context}\n\n----------------\nCreate {quantity} detailed question-answer pairs from the context above, the questions are asked by a curious user and the answers are by a helpful AI-Assistant. The question-answer pairs should abide by the following rules and instructions:\n1. The output should be in {language}.\n2. The output should be in CSV format, with the following header: ID;Question;Answer\n3. All question/answer pairs have to be strictly based on the context provided above and be self-contained and independent.\n4. The questions should be diverse and cover different aspects of the context provided above.\n5. The answers should be long, extensive, detailed, informative, helpful and self-contained.", 11 | "description": "A general purpose prompt creating qustion answer pairs. This prompt takes two arguments {quantity} or how many q/a pairs to generate and {language}", 12 | "parser": "csv_qa_parser" 13 | }, 14 | { 15 | "hash": "5755564c19", 16 | "text": "You are asked to come up with a set of {quantity} diverse task instructions in the field of medicine and healthcare. These task instructions will be given to a Medical GPT model and we will evaluate the Medical GPT model for completing the instructions.\n\nHere are the requirements:\n1. Try not to repeat the verb for each instruction to maximize diversity.\n2. The language used for the instruction also should be diverse. For example, you should combine questions with imperative instructions.\n3. The type of instructions should be diverse. The list should include diverse kinds of tasks like step-by-step reasoning, multiple-choice-questions, open-ended generation, classification, editing, complex medical questions, simple medical questions, etc.\n4. A GPT language model should be able to complete the instruction. For example, do not ask the assistant to create any visual or audio output. For another example, do not ask the assistant to wake you up at 5pm or set a reminder because it cannot perform any action.\n5. The instructions should be in {language}.\n6. The instructions should be 1 to 4 sentences long. Either an imperative sentence or a question is permitted.\n7. You should generate an appropriate input to the instruction. The input field should contain a specific example provided for the instruction. It should involve realistic data and should not contain simple placeholders. The input should provide substantial content to make the instruction challenging but should ideally not exceed 300 words.\n8. Not all instructions require input. For example, when an instruction asks about some general information, \"What is diabetes\", it is not necessary to provide a specific context. In this case, we simply put \"\" in the input field.\n9. The output should be an appropriate response to the instruction and the input. It should ideally not exceed 400 words.\n10. All generated output should use the metric system for measurements and UK names for medications, substances, drugs and everything else.\n\nList of {quantity} tasks (every task has the following fields: Task:, Instruction:, Input:, Output:):", 17 | "description": "Generates high complexity various medical instruction-tasks", 18 | "parser": null 19 | } 20 | ] -------------------------------------------------------------------------------- /experiments/Dataset Generation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "id": "4c5b7353-c14b-434c-a6c9-3a7c1fde8168", 7 | "metadata": { 8 | "tags": [] 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "import tiktoken\n", 13 | "import pandas as pd\n", 14 | "import getpass\n", 15 | "import openai\n", 16 | "\n", 17 | "from opengpt.config import Config\n", 18 | "from opengpt.dataset_utils import split_csv_by_max_len, create_dataset" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "id": "7a16f03d-c27c-4831-8eaa-447bca04bd62", 24 | "metadata": {}, 25 | "source": [ 26 | "## Prompt and domain-data configuration\n", 27 | "\n", 28 | "Make sure the prompts and domain-data is configured properly. Domain-data in the `config` file is the `datasets` part." 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 20, 34 | "id": "655a1116-0a27-4d27-ae8c-614b9f559f32", 35 | "metadata": { 36 | "tags": [] 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "config = Config(yaml_path='../configs/example_config_for_detaset_creation.yaml')" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "id": "85b20ef3-00ce-4dd3-8c53-2d1166fbb6e9", 47 | "metadata": { 48 | "tags": [] 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "# Based on the teacher and model, you might need to change the tokenizer\n", 53 | "tokenizer = tiktoken.encoding_for_model(config.teacher.model)" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 22, 59 | "id": "f8c37bf8-2ba7-4b98-aa52-f319a45fcd3a", 60 | "metadata": { 61 | "tags": [] 62 | }, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "application/vnd.jupyter.widget-view+json": { 67 | "model_id": "a9d3ec8e030c4f9f81982593d2f68694", 68 | "version_major": 2, 69 | "version_minor": 0 70 | }, 71 | "text/plain": [ 72 | "Datasets: 0%| | 0/1 [00:00\n", 266 | "\n", 279 | "\n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | "
textraw_data_id
0<|user|> What is considered a high blood press...0
1<|user|> What are the risks of having high blo...0
2<|user|> What lifestyle changes can help preve...0
3<|user|> What kind of medicines can help contr...0
4<|user|> What are some things that can increas...0
\n", 315 | "" 316 | ], 317 | "text/plain": [ 318 | " text raw_data_id\n", 319 | "0 <|user|> What is considered a high blood press... 0\n", 320 | "1 <|user|> What are the risks of having high blo... 0\n", 321 | "2 <|user|> What lifestyle changes can help preve... 0\n", 322 | "3 <|user|> What kind of medicines can help contr... 0\n", 323 | "4 <|user|> What are some things that can increas... 0" 324 | ] 325 | }, 326 | "execution_count": 31, 327 | "metadata": {}, 328 | "output_type": "execute_result" 329 | } 330 | ], 331 | "source": [ 332 | "prepared_data.head()" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 30, 338 | "id": "ceb30e42-f079-4d9d-80e4-4c38378d00d1", 339 | "metadata": { 340 | "tags": [] 341 | }, 342 | "outputs": [ 343 | { 344 | "data": { 345 | "text/html": [ 346 | "
\n", 347 | "\n", 360 | "\n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | "
idraw_outputdatasetlanguagerunprompt_hashprompt_text_hashcontext
00ID;Question;Answer\\n1;What is considered a hig...nhs_conditions_small_sampleEnglish0f53cf99826a886c127f3f267e647b41b8f12caf0fac51bec9ff2d908...Overview\\nHigh blood pressure (hypertension)\\n...
11ID;Question;Answer\\n1;What is bronchiolitis an...nhs_conditions_small_sampleEnglish0f53cf99826ca0cc4cd9acd981724d8478f85610d37679383bbb330cb...Bronchiolitis\\nBronchiolitis is a common chest...
22Patient: Hi, I've been coughing for a week now...nhs_conditions_small_sampleEnglish0f4df95ec692bfdb5c693f89fec690df0b78321945e82d630f0dabf98...Bronchitis\\nBronchitis is inflammation of the ...
33Patient: Hi, I have been experiencing joint pa...nhs_conditions_small_sampleEnglish0f4df95ec6914142c96ed663d1313091800a7778df104e50577c26f23...Steroids\\nSteroids, also called corticosteroid...
44ID;Question;Answer\\n1;What are the symptoms of...nhs_conditions_small_sampleEnglish0f53cf99826ed53b30b98e2f13a1b120f459ff0ca70d3e8f305bc8855...Overview\\nCreutzfeldt-Jakob disease\\nCreutzfel...
\n", 432 | "
" 433 | ], 434 | "text/plain": [ 435 | " id raw_output \n", 436 | "0 0 ID;Question;Answer\\n1;What is considered a hig... \\\n", 437 | "1 1 ID;Question;Answer\\n1;What is bronchiolitis an... \n", 438 | "2 2 Patient: Hi, I've been coughing for a week now... \n", 439 | "3 3 Patient: Hi, I have been experiencing joint pa... \n", 440 | "4 4 ID;Question;Answer\\n1;What are the symptoms of... \n", 441 | "\n", 442 | " dataset language run prompt_hash \n", 443 | "0 nhs_conditions_small_sample English 0 f53cf99826 \\\n", 444 | "1 nhs_conditions_small_sample English 0 f53cf99826 \n", 445 | "2 nhs_conditions_small_sample English 0 f4df95ec69 \n", 446 | "3 nhs_conditions_small_sample English 0 f4df95ec69 \n", 447 | "4 nhs_conditions_small_sample English 0 f53cf99826 \n", 448 | "\n", 449 | " prompt_text_hash \n", 450 | "0 a886c127f3f267e647b41b8f12caf0fac51bec9ff2d908... \\\n", 451 | "1 ca0cc4cd9acd981724d8478f85610d37679383bbb330cb... \n", 452 | "2 2bfdb5c693f89fec690df0b78321945e82d630f0dabf98... \n", 453 | "3 14142c96ed663d1313091800a7778df104e50577c26f23... \n", 454 | "4 ed53b30b98e2f13a1b120f459ff0ca70d3e8f305bc8855... \n", 455 | "\n", 456 | " context \n", 457 | "0 Overview\\nHigh blood pressure (hypertension)\\n... \n", 458 | "1 Bronchiolitis\\nBronchiolitis is a common chest... \n", 459 | "2 Bronchitis\\nBronchitis is inflammation of the ... \n", 460 | "3 Steroids\\nSteroids, also called corticosteroid... \n", 461 | "4 Overview\\nCreutzfeldt-Jakob disease\\nCreutzfel... " 462 | ] 463 | }, 464 | "execution_count": 30, 465 | "metadata": {}, 466 | "output_type": "execute_result" 467 | } 468 | ], 469 | "source": [ 470 | "raw_data.head()" 471 | ] 472 | } 473 | ], 474 | "metadata": { 475 | "kernelspec": { 476 | "display_name": "Python 3 (ipykernel)", 477 | "language": "python", 478 | "name": "python3" 479 | }, 480 | "language_info": { 481 | "codemirror_mode": { 482 | "name": "ipython", 483 | "version": 3 484 | }, 485 | "file_extension": ".py", 486 | "mimetype": "text/x-python", 487 | "name": "python", 488 | "nbconvert_exporter": "python", 489 | "pygments_lexer": "ipython3", 490 | "version": "3.8.0" 491 | } 492 | }, 493 | "nbformat": 4, 494 | "nbformat_minor": 5 495 | } 496 | -------------------------------------------------------------------------------- /experiments/Prompt Creation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 27, 6 | "id": "77303897-4f09-4726-ad0a-e177747c82fd", 7 | "metadata": { 8 | "tags": [] 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "from opengpt.prompt_utils import add_to_prompt_database\n", 13 | "from opengpt.config import Config\n", 14 | "import hashlib\n", 15 | "import json" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 3, 21 | "id": "a8e3f831-bf93-416a-9f2d-f5326a4bc20f", 22 | "metadata": { 23 | "tags": [] 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "config = Config(yaml_path='../configs/example_config_for_detaset_creation.yaml')" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "id": "0ae8d8a7-926d-4ab5-8266-024b28bc7304", 33 | "metadata": {}, 34 | "source": [ 35 | "## How to add prompts into the prompt database\n", 36 | "\n", 37 | "Here we will add three prompts to the prompt database, the same template can be used to add any prompt. \n", 38 | "\n", 39 | "Each prompt requires three fields:\n", 40 | "\n", 41 | "- `text` - The text of the prompt that will be sent to OpenAI (ChatGPT, or GPT-4, or any other model that can be used to create a dataset). The prompt needs to have at least two placeholders. First, `context` - the content of a document that will be used to generate question/answer pairs, or a conversation, or anything else. And second, `language` in what language do we want the output to be.\n", 42 | "\n", 43 | "- `description` - A short description of what is this prompt for and how to use it.\n", 44 | "\n", 45 | "- `parser` - The parser to be used to parse the output from a Teacher (e.g. OpenAI, Google).\n", 46 | "\n", 47 | "Notes:\n", 48 | "- When using the `csv_*` parsers the separator \";\" has to be used." 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "id": "4127e8b4-b9e3-4e25-a20e-18415f530340", 55 | "metadata": { 56 | "tags": [] 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "# Check what is already in the prompt DB, use existing prompts or add new ones \n", 61 | "if os.path.exists(config.path.prompt_db):\n", 62 | " db = json.load(open(config.path.prompt_db))\n", 63 | " for prompt in db:\n", 64 | " print('Description: ', prompt['description'])\n", 65 | " print('Hash: ', prompt['hash'])\n", 66 | " print('Parser: ', prompt['parser'])\n", 67 | " print('Text: ', prompt['text'])\n", 68 | " print(\"*\"*100)\n", 69 | " print()" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 11, 75 | "id": "2a3e445f-4454-4209-950c-d4ee8be40ecc", 76 | "metadata": { 77 | "tags": [] 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "text = '''Given the following context: {context}\n", 82 | "\n", 83 | "----------------\n", 84 | "Create a conversation between a Patient concerned about the symptoms and disorders presented above and an AI-Assistant. The conversation should abide by the following rules and instructions:\n", 85 | "1. The output should be in {language}.\n", 86 | "2. All questions and answers have to be strictly based on the context provided above.\n", 87 | "3. The questions should be diverse and cover different aspects of the context provided above.\n", 88 | "4. When generating the conversation please always use the English placeholders \"Patient\" for the Patient and \"AI-Assistant\" for the AI-Assistant, irrelevant of the language.\n", 89 | "5. The conversation should always be started by the Patient.\n", 90 | "6. The patient should start the conversation with a \"Hi,\".\n", 91 | "7. The AI-Assistant should be helpful, and supportive and try to not scare the patient. The answers have be to detailed and self-contained.'''\n", 92 | "description = '''This will generate a conversation between a Patient and an AI assistant in the specified languages. The only argument for this prompt is the {language} argument, if not provided it will default to English.'''\n", 93 | "parser = 'medical_conversation_parser'" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 12, 99 | "id": "a4d2789d-811d-4dc3-8371-98cd8987fb79", 100 | "metadata": { 101 | "tags": [] 102 | }, 103 | "outputs": [ 104 | { 105 | "name": "stderr", 106 | "output_type": "stream", 107 | "text": [ 108 | "WARNING:root:Added prompt: f4df95ec69\n" 109 | ] 110 | } 111 | ], 112 | "source": [ 113 | "db = add_to_prompt_database(text, \n", 114 | " description, \n", 115 | " parser, \n", 116 | " config.path.prompt_db, \n", 117 | " force_replace=False)" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 13, 123 | "id": "3a09bce3-a264-4d3d-bbac-6a5c8a148ae0", 124 | "metadata": { 125 | "tags": [] 126 | }, 127 | "outputs": [], 128 | "source": [ 129 | "text = '''Given the following context: {context}\n", 130 | "\n", 131 | "----------------\n", 132 | "Create {quantity} detailed question-answer pairs from the context above, the questions are asked by a curious user and the answers are by a helpful AI-Assistant. The question-answer pairs should abide by the following rules and instructions:\n", 133 | "1. The output should be in {language}.\n", 134 | "2. The output should be in CSV format, with the following header: ID;Question;Answer\n", 135 | "3. All question/answer pairs have to be strictly based on the context provided above and be self-contained and independent.\n", 136 | "4. The questions should be diverse and cover different aspects of the context provided above.\n", 137 | "5. The answers should be long, extensive, detailed, informative, helpful and self-contained.'''\n", 138 | "description = 'A general purpose prompt creating qustion answer pairs. This prompt takes two arguments {quantity} or how many q/a pairs to generate and {language}'\n", 139 | "parser = 'csv_qa_parser'" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 14, 145 | "id": "588ed831-144b-4cf3-8088-2f7da264ed1f", 146 | "metadata": { 147 | "tags": [] 148 | }, 149 | "outputs": [ 150 | { 151 | "name": "stderr", 152 | "output_type": "stream", 153 | "text": [ 154 | "WARNING:root:Added prompt: f53cf99826\n" 155 | ] 156 | } 157 | ], 158 | "source": [ 159 | "db = add_to_prompt_database(text, \n", 160 | " description, \n", 161 | " parser, \n", 162 | " config.path.prompt_db, \n", 163 | " force_replace=False)" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 15, 169 | "id": "0b877351-fd90-4728-b4a0-adf00d986d4f", 170 | "metadata": { 171 | "tags": [] 172 | }, 173 | "outputs": [], 174 | "source": [ 175 | "text = '''You are asked to come up with a set of {quantity} diverse task instructions in the field of medicine and healthcare. These task instructions will be given to a Medical GPT model and we will evaluate the Medical GPT model for completing the instructions.\n", 176 | "\n", 177 | "Here are the requirements:\n", 178 | "1. Try not to repeat the verb for each instruction to maximize diversity.\n", 179 | "2. The language used for the instruction also should be diverse. For example, you should combine questions with imperative instructions.\n", 180 | "3. The type of instructions should be diverse. The list should include diverse kinds of tasks like step-by-step reasoning, multiple-choice-questions, open-ended generation, classification, editing, complex medical questions, simple medical questions, etc.\n", 181 | "4. A GPT language model should be able to complete the instruction. For example, do not ask the assistant to create any visual or audio output. For another example, do not ask the assistant to wake you up at 5pm or set a reminder because it cannot perform any action.\n", 182 | "5. The instructions should be in {language}.\n", 183 | "6. The instructions should be 1 to 4 sentences long. Either an imperative sentence or a question is permitted.\n", 184 | "7. You should generate an appropriate input to the instruction. The input field should contain a specific example provided for the instruction. It should involve realistic data and should not contain simple placeholders. The input should provide substantial content to make the instruction challenging but should ideally not exceed 300 words.\n", 185 | "8. Not all instructions require input. For example, when an instruction asks about some general information, \"What is diabetes\", it is not necessary to provide a specific context. In this case, we simply put \"\" in the input field.\n", 186 | "9. The output should be an appropriate response to the instruction and the input. It should ideally not exceed 400 words.\n", 187 | "10. All generated output should use the metric system for measurements and UK names for medications, substances, drugs and everything else.\n", 188 | "\n", 189 | "List of {quantity} tasks (every task has the following fields: Task:, Instruction:, Input:, Output:):'''\n", 190 | "description = '''Generates high complexity various medical instruction-tasks'''\n", 191 | "parser = None" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 16, 197 | "id": "9502955a-2710-427c-aab0-aa6c20990021", 198 | "metadata": { 199 | "tags": [] 200 | }, 201 | "outputs": [ 202 | { 203 | "name": "stderr", 204 | "output_type": "stream", 205 | "text": [ 206 | "WARNING:root:Added prompt: 5755564c19\n" 207 | ] 208 | } 209 | ], 210 | "source": [ 211 | "db = add_to_prompt_database(text, \n", 212 | " description, \n", 213 | " parser, \n", 214 | " config.path.prompt_db, \n", 215 | " force_replace=False)" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 25, 221 | "id": "ca6a923c-9e34-42d7-a93d-039c3908252f", 222 | "metadata": { 223 | "tags": [] 224 | }, 225 | "outputs": [ 226 | { 227 | "name": "stdout", 228 | "output_type": "stream", 229 | "text": [ 230 | "Description: This will generate a conversation between a Patient and an AI assistant in the specified languages. The only argument for this prompt is the {language} argument, if not provided it will default to English.\n", 231 | "Hash: f4df95ec69\n", 232 | "Parser: medical_conversation_parser\n", 233 | "Text: Given the following context: {context}\n", 234 | "\n", 235 | "----------------\n", 236 | "Create a conversation between a Patient concerned about the symptoms and disorders presented above and an AI-Assistant. The conversation should abide by the following rules and instructions:\n", 237 | "1. The output should be in {language}.\n", 238 | "2. All questions and answers have to be strictly based on the context provided above.\n", 239 | "3. The questions should be diverse and cover different aspects of the context provided above.\n", 240 | "4. When generating the conversation please always use the English placeholders \"Patient\" for the Patient and \"AI-Assistant\" for the AI-Assistant, irrelevant of the language.\n", 241 | "5. The conversation should always be started by the Patient.\n", 242 | "6. The patient should start the conversation with a \"Hi,\".\n", 243 | "7. The AI-Assistant should be helpful, and supportive and try to not scare the patient. The answers have be to detailed and self-contained.\n", 244 | "****************************************************************************************************\n", 245 | "\n", 246 | "Description: A general purpose prompt creating qustion answer pairs. This prompt takes two arguments {quantity} or how many q/a pairs to generate and {language}\n", 247 | "Hash: f53cf99826\n", 248 | "Parser: csv_qa_parser\n", 249 | "Text: Given the following context: {context}\n", 250 | "\n", 251 | "----------------\n", 252 | "Create {quantity} detailed question-answer pairs from the context above, the questions are asked by a curious user and the answers are by a helpful AI-Assistant. The question-answer pairs should abide by the following rules and instructions:\n", 253 | "1. The output should be in {language}.\n", 254 | "2. The output should be in CSV format, with the following header: ID;Question;Answer\n", 255 | "3. All question/answer pairs have to be strictly based on the context provided above and be self-contained and independent.\n", 256 | "4. The questions should be diverse and cover different aspects of the context provided above.\n", 257 | "5. The answers should be long, extensive, detailed, informative, helpful and self-contained.\n", 258 | "****************************************************************************************************\n", 259 | "\n", 260 | "Description: Generates high complexity various medical instruction-tasks\n", 261 | "Hash: 5755564c19\n", 262 | "Parser: None\n", 263 | "Text: You are asked to come up with a set of {quantity} diverse task instructions in the field of medicine and healthcare. These task instructions will be given to a Medical GPT model and we will evaluate the Medical GPT model for completing the instructions.\n", 264 | "\n", 265 | "Here are the requirements:\n", 266 | "1. Try not to repeat the verb for each instruction to maximize diversity.\n", 267 | "2. The language used for the instruction also should be diverse. For example, you should combine questions with imperative instructions.\n", 268 | "3. The type of instructions should be diverse. The list should include diverse kinds of tasks like step-by-step reasoning, multiple-choice-questions, open-ended generation, classification, editing, complex medical questions, simple medical questions, etc.\n", 269 | "4. A GPT language model should be able to complete the instruction. For example, do not ask the assistant to create any visual or audio output. For another example, do not ask the assistant to wake you up at 5pm or set a reminder because it cannot perform any action.\n", 270 | "5. The instructions should be in {language}.\n", 271 | "6. The instructions should be 1 to 4 sentences long. Either an imperative sentence or a question is permitted.\n", 272 | "7. You should generate an appropriate input to the instruction. The input field should contain a specific example provided for the instruction. It should involve realistic data and should not contain simple placeholders. The input should provide substantial content to make the instruction challenging but should ideally not exceed 300 words.\n", 273 | "8. Not all instructions require input. For example, when an instruction asks about some general information, \"What is diabetes\", it is not necessary to provide a specific context. In this case, we simply put \"\" in the input field.\n", 274 | "9. The output should be an appropriate response to the instruction and the input. It should ideally not exceed 400 words.\n", 275 | "10. All generated output should use the metric system for measurements and UK names for medications, substances, drugs and everything else.\n", 276 | "\n", 277 | "List of {quantity} tasks (every task has the following fields: Task:, Instruction:, Input:, Output:):\n", 278 | "****************************************************************************************************\n", 279 | "\n" 280 | ] 281 | } 282 | ], 283 | "source": [ 284 | "# Print the prompt db\n", 285 | "for prompt in db:\n", 286 | " print('Description: ', prompt['description'])\n", 287 | " print('Hash: ', prompt['hash'])\n", 288 | " print('Parser: ', prompt['parser'])\n", 289 | " print('Text: ', prompt['text'])\n", 290 | " print(\"*\"*100)\n", 291 | " print()" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "id": "2d4a2a9b-4de8-488e-9914-8181ec218935", 298 | "metadata": {}, 299 | "outputs": [], 300 | "source": [] 301 | } 302 | ], 303 | "metadata": { 304 | "kernelspec": { 305 | "display_name": "Python 3 (ipykernel)", 306 | "language": "python", 307 | "name": "python3" 308 | }, 309 | "language_info": { 310 | "codemirror_mode": { 311 | "name": "ipython", 312 | "version": 3 313 | }, 314 | "file_extension": ".py", 315 | "mimetype": "text/x-python", 316 | "name": "python", 317 | "nbconvert_exporter": "python", 318 | "pygments_lexer": "ipython3", 319 | "version": "3.8.0" 320 | } 321 | }, 322 | "nbformat": 4, 323 | "nbformat_minor": 5 324 | } 325 | -------------------------------------------------------------------------------- /experiments/Supervised Training.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "id": "6f859a45-5ca7-47cc-8055-dabedd301963", 7 | "metadata": { 8 | "tags": [] 9 | }, 10 | "outputs": [ 11 | { 12 | "name": "stderr", 13 | "output_type": "stream", 14 | "text": [ 15 | "2023-05-08 18:13:35.960625: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", 16 | "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", 17 | "2023-05-08 18:13:37.052768: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" 18 | ] 19 | } 20 | ], 21 | "source": [ 22 | "from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, pipeline\n", 23 | "import pickle\n", 24 | "import pandas as pd\n", 25 | "import datasets\n", 26 | "\n", 27 | "\n", 28 | "from opengpt.config import Config\n", 29 | "from opengpt.model_utils import add_tokens_to_model_and_tokenizer\n", 30 | "from opengpt.dataset_utils import create_labels, pack_examples\n", 31 | "from opengpt.data_collator import DataCollatorWithPadding" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 4, 37 | "id": "dd137647-a6d7-49c8-a241-404137ef3b08", 38 | "metadata": { 39 | "tags": [] 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "config = Config(yaml_path='../configs/example_train_config.yaml')\n", 44 | "model = AutoModelForCausalLM.from_pretrained(config.train.model)\n", 45 | "tokenizer = AutoTokenizer.from_pretrained(config.train.model)\n", 46 | "tokenizer.model_max_length = config.train.max_seq_len" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 5, 52 | "id": "5c389455-d401-4857-a61d-0cc1e72d312b", 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stderr", 57 | "output_type": "stream", 58 | "text": [ 59 | "WARNING:root:Added: 5 tokens to the tokenizer\n" 60 | ] 61 | } 62 | ], 63 | "source": [ 64 | "add_tokens_to_model_and_tokenizer(config, tokenizer, model)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "id": "bdb0bac3-1504-430f-a28c-0aea4ea28bbc", 70 | "metadata": {}, 71 | "source": [ 72 | "### Load data" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "id": "b0f6ef4f-3c62-47f8-a717-9d0a57d086ad", 78 | "metadata": {}, 79 | "source": [ 80 | "The datasets used for training have to have the special tokens as defined in the config. By default this means that the datasets have to be organised as conversations using the `<|user|> <|ai|>` and `<|eos|> <|eod|>` special tokens. An example of a question/answer pair from the NHS-UK dataset:\n", 81 | "\n", 82 | "```\n", 83 | "<|user|> What is high blood pressure? <|eos|> <|ai|> High blood pressure is a condition where the force at which your heart pumps blood around your body is high. It is recorded with 2 numbers, the systolic pressure and the diastolic pressure, both measured in millimetres of mercury (mmHg).\n", 84 | "References:\n", 85 | "- https://www.nhs.uk/conditions/Blood-pressure-(high)/Pages/Introduction.aspx <|eos|> <|eod|>\n", 86 | "```\n", 87 | "\n", 88 | "If not done the training scripts below will not work." 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 8, 94 | "id": "6a2589a6-2370-4b23-98fc-91ebcf6b24aa", 95 | "metadata": { 96 | "tags": [] 97 | }, 98 | "outputs": [ 99 | { 100 | "name": "stdout", 101 | "output_type": "stream", 102 | "text": [ 103 | "Shuffling dataset\n" 104 | ] 105 | } 106 | ], 107 | "source": [ 108 | "train_dataset = datasets.Dataset.from_csv(config.train.datasets)\n", 109 | "if config.train.shuffle_dataset:\n", 110 | " train_dataset = train_dataset.shuffle()\n", 111 | " print(\"Shuffling dataset!\")" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "id": "11ab2eea-65d7-4b9e-934a-4179608bd6f4", 117 | "metadata": {}, 118 | "source": [ 119 | "#### Remove all columns that we do not need, filtering of the dataset can be done before removal if needed" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 9, 125 | "id": "75b45ea0-c435-4193-aa1b-622a062b4386", 126 | "metadata": { 127 | "tags": [] 128 | }, 129 | "outputs": [], 130 | "source": [ 131 | "# Remove everything but text\n", 132 | "to_remove = list(train_dataset.column_names)\n", 133 | "to_remove.remove('text')\n", 134 | "train_dataset = train_dataset.remove_columns(to_remove)" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": 11, 140 | "id": "e270be31-1643-42c7-8dc1-a9206b88a243", 141 | "metadata": { 142 | "tags": [] 143 | }, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "application/vnd.jupyter.widget-view+json": { 148 | "model_id": "", 149 | "version_major": 2, 150 | "version_minor": 0 151 | }, 152 | "text/plain": [ 153 | "Map: 0%| | 0/29660 [00:00\n", 251 | " \n", 252 | " \n", 253 | " [548/548 06:27, Epoch 0/1]\n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | "
StepTraining Loss
1001.650500
2001.489700
3001.445900
4001.416500
5001.399800

" 285 | ], 286 | "text/plain": [ 287 | "" 288 | ] 289 | }, 290 | "metadata": {}, 291 | "output_type": "display_data" 292 | }, 293 | { 294 | "data": { 295 | "text/plain": [ 296 | "TrainOutput(global_step=548, training_loss=1.4748950457050853, metrics={'train_runtime': 388.7346, 'train_samples_per_second': 22.563, 'train_steps_per_second': 1.41, 'total_flos': 1931665648896000.0, 'train_loss': 1.4748950457050853, 'epoch': 1.0})" 297 | ] 298 | }, 299 | "execution_count": 17, 300 | "metadata": {}, 301 | "output_type": "execute_result" 302 | } 303 | ], 304 | "source": [ 305 | "trainer.train()" 306 | ] 307 | }, 308 | { 309 | "cell_type": "markdown", 310 | "id": "11a88494-afb1-44a5-b607-6382fe5b0c9c", 311 | "metadata": {}, 312 | "source": [ 313 | "# Test Generation" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": 18, 319 | "id": "2d37cb47-c029-40f5-bc7d-accefae42f50", 320 | "metadata": { 321 | "tags": [] 322 | }, 323 | "outputs": [], 324 | "source": [ 325 | "gen = pipeline(model=model, tokenizer=tokenizer, task='text-generation', device=model.device)" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 19, 331 | "id": "86ce8f6e-6037-4c2b-8aac-5ad7a129834a", 332 | "metadata": { 333 | "tags": [] 334 | }, 335 | "outputs": [], 336 | "source": [ 337 | "t = \"<|user|> What is diabetes? <|eos|> <|ai|>\" # The format with special tokens is required, because of training" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": 39, 343 | "id": "daab006f-eda3-43db-b864-a1c1de52d4d3", 344 | "metadata": { 345 | "tags": [] 346 | }, 347 | "outputs": [ 348 | { 349 | "name": "stderr", 350 | "output_type": "stream", 351 | "text": [ 352 | "Setting `pad_token_id` to `eos_token_id`:50267 for open-end generation.\n" 353 | ] 354 | }, 355 | { 356 | "name": "stdout", 357 | "output_type": "stream", 358 | "text": [ 359 | "<|user|> What is diabetes? <|eos|> <|ai|> Diabetes is a condition in which the body's insulin levels are too low, which can lead to high blood sugar levels.\n", 360 | "References:\n", 361 | "- https://www.nhs.uk/conditions/diabetes/ \n" 362 | ] 363 | } 364 | ], 365 | "source": [ 366 | "# Temperature is important, and depending on your model different values will be good (this one is for gpt-2)\n", 367 | "print(gen(t, do_sample=True, max_length=128, temperature=0.2)[0]['generated_text'])" 368 | ] 369 | } 370 | ], 371 | "metadata": { 372 | "kernelspec": { 373 | "display_name": "Python 3 (ipykernel)", 374 | "language": "python", 375 | "name": "python3" 376 | }, 377 | "language_info": { 378 | "codemirror_mode": { 379 | "name": "ipython", 380 | "version": 3 381 | }, 382 | "file_extension": ".py", 383 | "mimetype": "text/x-python", 384 | "name": "python", 385 | "nbconvert_exporter": "python", 386 | "pygments_lexer": "ipython3", 387 | "version": "3.8.0" 388 | } 389 | }, 390 | "nbformat": 4, 391 | "nbformat_minor": 5 392 | } 393 | -------------------------------------------------------------------------------- /llama_train_requirements.txt: -------------------------------------------------------------------------------- 1 | . 2 | protobuf==3.20.3 3 | accelerate 4 | https://github.com/huggingface/transformers 5 | sentencepiece 6 | -------------------------------------------------------------------------------- /opengpt/config.py: -------------------------------------------------------------------------------- 1 | from box import Box 2 | import jsonpickle 3 | import os 4 | import yaml 5 | 6 | class BaseConfig(object): 7 | def __init__(self, to_box=False): 8 | pass 9 | 10 | def _to_box(self): 11 | # Convert all dicts to boxes 12 | for key, val in self.__dict__.items(): 13 | if isinstance(val, dict): 14 | self.__setattr__(key, Box(val)) 15 | 16 | def _from_box(self): 17 | # Convert all dicts to boxes 18 | for key, val in self.__dict__.items(): 19 | if isinstance(val, Box): 20 | self.__setattr__(key, val.to_dict()) 21 | 22 | def save(self, save_path=None): 23 | r''' Save the config into a .json file 24 | Args: 25 | save_path (`str`): 26 | Where to save the created json file, if nothing we use the default from paths. 27 | ''' 28 | if save_path is None: 29 | save_path = self.path.self 30 | 31 | # We want to save the dict here, not the whole class 32 | self._from_box() 33 | json_string = jsonpickle.encode({k:v for k,v in self.__dict__.items() if k != 'path'}) 34 | 35 | with open(save_path, 'w') as f: 36 | f.write(json_string) 37 | self._to_box() 38 | 39 | @classmethod 40 | def load(cls, save_path): 41 | config = cls(to_box=False) 42 | # Read the jsonpickle string 43 | with open(save_path) as f: 44 | config_dict = jsonpickle.decode(f.read()) 45 | config.merge_config(config_dict) 46 | config._to_box() 47 | return config 48 | 49 | def merge_config(self, config_dict): 50 | r''' Merge a config_dict with the existing config object. 51 | Args: 52 | config_dict (`dict`): 53 | A dictionary which key/values should be added to this class. 54 | ''' 55 | for key in config_dict.keys(): 56 | if key in self.__dict__ and isinstance(self.__dict__[key], dict): 57 | self.__dict__[key].update(config_dict[key]) 58 | else: 59 | self.__dict__[key] = config_dict[key] 60 | 61 | 62 | class Config(BaseConfig): 63 | r''' There are probably nicer ways to do this, but I like this one. 64 | ''' 65 | def __init__(self, yaml_path): 66 | self.yaml_path = yaml_path 67 | self.load_yaml(yaml_path) 68 | 69 | def reload_yaml(self): 70 | self.load_yaml(self.yaml_path) 71 | 72 | def load_yaml(self, yaml_path): 73 | _config = yaml.safe_load(open(yaml_path, 'r')) 74 | self.to_box = True 75 | self.base_path = './' 76 | self.datasets = {} 77 | self.name = 'opengpt' 78 | 79 | for k,v in _config.items(): 80 | self.__setattr__(k, v) 81 | # For fun, we will also keept the _config 82 | self._config = _config 83 | 84 | self.path = {'self': os.path.join(self.base_path, f'config_for_{self.name}.json')} 85 | if _config.get('static_paths', None): 86 | self.path.update(_config['static_paths']) 87 | 88 | if self.to_box: 89 | self._to_box() 90 | 91 | def create_dirs(paths): 92 | for path in paths: 93 | if isinstance(path, str): 94 | os.makedirs(os.path.dirname(path), exist_ok=True) 95 | elif isinstance(path, dict): 96 | create_dirs(path.values()) 97 | create_dirs(self.path.values()) 98 | 99 | # Create dirs for datasets, this is where all the data from one dataset will go 100 | for ds in self.datasets: 101 | os.makedirs(os.path.join(self.base_path, ds['name']), exist_ok=True) -------------------------------------------------------------------------------- /opengpt/data_collator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class DataCollatorWithPadding(object): 4 | r''' Will pad or trim examples to the appropriate length. 5 | ''' 6 | def __init__(self, pad_token_id, ignore_index, max_seq_len): 7 | self.pad_token_id = pad_token_id 8 | self.ignore_index = ignore_index 9 | self.max_seq_len = max_seq_len 10 | 11 | def __call__(self, instances): 12 | input_ids, labels = tuple([torch.tensor(instance[key][0:self.max_seq_len]) for instance in instances] for key in ("input_ids", "labels")) 13 | batch = {} 14 | 15 | batch['input_ids'] = torch.nn.utils.rnn.pad_sequence(input_ids, batch_first=True, padding_value=self.pad_token_id) 16 | batch['labels'] = torch.nn.utils.rnn.pad_sequence(labels, batch_first=True, padding_value=self.ignore_index) 17 | batch['attention_mask'] = batch['input_ids'].ne(self.pad_token_id) 18 | 19 | return batch -------------------------------------------------------------------------------- /opengpt/dataset_utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import math 3 | import os 4 | import json 5 | import hashlib 6 | from tqdm.auto import tqdm 7 | from opengpt import parsers, teachers 8 | import logging 9 | import random 10 | 11 | 12 | def split_csv_by_max_len(datasets, max_len, tokenizer, base_path): 13 | r''' Given a tokenizer it will split the dataset (based on the `text` column) into max_len sequencse 14 | ''' 15 | for dataset in tqdm(datasets, desc='Datasets', total=len(datasets)): 16 | csv_path = dataset['path'] 17 | name = dataset['name'] 18 | 19 | nrows = None 20 | if dataset.get('nrows', -1) > 0: 21 | nrows = dataset['nrows'] 22 | 23 | df = pd.read_csv(csv_path, nrows=nrows) 24 | cols = df.columns 25 | assert 'text' in cols, f'The CSV for dataset {name} has no "text" column.' 26 | 27 | new_data = [list(cols) + ['len', 'part']] 28 | for _, row in tqdm(df.iterrows(), desc=dataset['name'], total=len(df)): 29 | text = row['text'] 30 | tokens = tokenizer.encode(text) 31 | 32 | for i in range(math.ceil(len(tokens) / max_len)): 33 | new_text = tokenizer.decode(tokens[i*max_len:(i+1)*max_len]) 34 | new_data_row = [row[c] if c != 'text' else new_text for c in cols] 35 | new_data_row.append(len(tokens[i*max_len:(i+1)*max_len])) 36 | new_data_row.append(f'part_{i}') 37 | new_data.append(new_data_row) 38 | 39 | # Save 40 | new_df = pd.DataFrame(new_data[1:], columns=new_data[0]) 41 | new_df.to_csv(os.path.join(base_path, name, 'data_split_by_length.csv'), index=False) 42 | logging.warning(f'{dataset["name"]}: length before vs after: {len(df)} vs {len(new_df)}\n') 43 | 44 | 45 | def create_dataset_no_input(config): 46 | r''' This does not require an input dataset to generate a new dataset, only a prompt is needed 47 | ''' 48 | prompt_db = json.load(open(config.path.prompt_db, 'rb')) 49 | raw_data_columns = ['id', 'raw_output', 'prompt_hash'] 50 | raw_data = pd.DataFrame(None, columns=raw_data_columns) 51 | raw_data_path = os.path.join(config.base_path, config.name, f"raw_generated_data_for_{config.name}.csv") 52 | if os.path.exists(raw_data_path): 53 | raw_data = pd.read_csv(raw_data_path) 54 | logging.warning(f"Loading an existing openai generated dataset found at: {raw_data_path}" + 55 | f"There are already {len(raw_data)} rows in the that dataset, the generation will continue from where last left off. " + 56 | f"The script will also do all examples that were not done in the previous run.") 57 | 58 | 59 | teacher = getattr(teachers, f'ask_{config.teacher.name}') 60 | for prompt_config in config.prompts: 61 | prompts = [prompt for prompt in prompt_db if prompt['hash'] in prompt_config['hashes']] # There must be one 62 | 63 | parameters = prompt_config.get('extra_parameters', {}) 64 | 65 | for language in prompt_config.get('languages', ['English']): 66 | parameters['language'] = language 67 | logging.warning(f"\nStarting prompts: {prompt_config['hashes']}\n #Runs: {prompt_config['runs']}\nLanguage: {language}") 68 | for prompt in prompts: 69 | # If some examples exist already 70 | 71 | 72 | start = len(raw_data[raw_data.prompt_hash == prompt['hash']]) 73 | for _ in tqdm(range(start, prompt_config['runs']), total=(prompt_config['runs'] - start)): 74 | prompt_text_template = prompt['text'] 75 | prompt_text = prompt_text_template.format(**parameters) 76 | try: 77 | out = teacher(prompt_text, config) 78 | new_data = pd.DataFrame([[len(raw_data), out, prompt['hash']]], columns=raw_data_columns) 79 | raw_data = pd.concat([raw_data, new_data], ignore_index=True) 80 | 81 | if len(raw_data) % config.data_generation_checkpoint_every == 0: 82 | logging.warning("Checkpointing the generated dataset.") 83 | raw_data.to_csv(raw_data_path, index=False) 84 | 85 | except Exception as e: 86 | logging.exception(e) 87 | logging.warning(f"Skipping example for prompt: {prompt['hash']}\n") 88 | 89 | if raw_data is not None and len(raw_data) > 0: 90 | raw_data.to_csv(raw_data_path, index=False) 91 | 92 | return raw_data 93 | 94 | 95 | def create_dataset(config): 96 | prompt_db = json.load(open(config.path.prompt_db, 'rb')) 97 | raw_data_columns = ['id', 'raw_output', 'dataset', 'language', 'run', 'prompt_hash', 'prompt_text_hash', 'context'] 98 | raw_data = pd.DataFrame(None, columns=raw_data_columns) 99 | prepared_data = None 100 | raw_data_path = os.path.join(config.base_path, config.name, f"raw_generated_data_for_{config.name}.csv") 101 | prepared_data_path = os.path.join(config.base_path, config.name, f"prepared_generated_data_for_{config.name}.csv") 102 | if os.path.exists(raw_data_path) and os.path.exists(prepared_data_path): 103 | raw_data = pd.read_csv(raw_data_path) 104 | prepared_data = pd.read_csv(prepared_data_path) 105 | logging.warning(f"Loading an existing openai generated dataset found at: \n{raw_data_path}\n and\n{prepared_data_path}\n" + 106 | f"There are already {len(raw_data)} rows in the that dataset, the generation will continue from where last left off. " + 107 | f"The script will also do all examples that were not done in the previous run.\n" + 108 | "***Take care that if prompt_config['random_prompt'] is set to true, it can produce unwanted results.\n\n") 109 | 110 | cnt = 0 111 | for prompt_config in config.prompts: 112 | prompts = [prompt for prompt in prompt_db if prompt['hash'] in prompt_config['hashes']] # There must be one 113 | teacher = getattr(teachers, f'ask_{config.teacher.name}') 114 | 115 | for run in range(prompt_config.get('runs', 1)): 116 | parameters = prompt_config.get('extra_parameters', {}) 117 | extra_data_columns = prompt_config.get('extra_data_columns', []) 118 | 119 | for language in prompt_config.get('languages', ['English']): 120 | parameters['language'] = language 121 | logging.warning(f"\nStarting prompts: {prompt_config['hashes']}\nRun: {run}\nLanguage: {language}") 122 | for dataset_name in prompt_config['datasets']: 123 | df = pd.read_csv(os.path.join(config.base_path, dataset_name, 'data_split_by_length.csv')) 124 | for row_ind, row in tqdm(df.iterrows(), desc=dataset_name, total=len(df)): 125 | # Set the context from the current row 126 | parameters['context'] = row['text'] 127 | for col in extra_data_columns: 128 | parameters[col] = row[col] 129 | if prompt_config.get('random_prompt', False): 130 | # This means for each example in the dataset we randomly select a prompt to be used, if False 131 | #every example will run through every prompt 132 | selected_prompts = [random.choice(prompts)] 133 | else: 134 | selected_prompts = prompts # Use all prompts sequentially 135 | for prompt in selected_prompts: 136 | prompt_text_template = prompt['text'] 137 | # Every prompt has its own parser 138 | parser = getattr(parsers, prompt['parser']) 139 | if len(str(row['text']).split(" ")) > config.teacher.min_len: 140 | prompt_text = prompt_text_template.format(**parameters) 141 | # The hash is of everything that is used to generate the output 142 | h = hashlib.sha256(prompt_text.encode("utf-8")) 143 | h.update(str(run).encode("utf-8")) 144 | h = h.hexdigest() 145 | 146 | # Only get the output if this was not done already 147 | if h not in raw_data.prompt_text_hash.values: 148 | # Get output from OpenAI and parse using parser, the parser will append the parsed data onto the prepared_data CSV. 149 | try: 150 | openai_output = teacher(prompt_text, config) 151 | prepared_data = parser(data=openai_output, prepared_data=prepared_data, prompt_config=prompt_config, config=config, row=row, 152 | raw_data_id=len(raw_data), prompt_text=prompt_text) # ID is length of raw_data 153 | 154 | # Concat the current output to the data dataframe, only if not None 155 | if prepared_data is not None and len(prepared_data) > 0: 156 | new_data = pd.DataFrame([[len(raw_data), openai_output, dataset_name, language, run, prompt['hash'], h, parameters['context']]], 157 | columns=raw_data_columns) 158 | raw_data = pd.concat([raw_data, new_data], ignore_index=True) 159 | if len(raw_data) % config.data_generation_checkpoint_every == 0: 160 | logging.warning("Checkpointing the generated dataset.") 161 | raw_data.to_csv(raw_data_path, index=False) 162 | prepared_data.to_csv(prepared_data_path, index=False) 163 | except Exception as e: 164 | logging.exception(e) 165 | logging.warning(f"Skipping example at position: {row_ind} for dataset: {dataset_name}\n") 166 | # Final save 167 | if raw_data is not None and prepared_data is not None and len(raw_data) > 0 and len(prepared_data) > 0: 168 | raw_data.to_csv(raw_data_path, index=False) 169 | prepared_data.to_csv(prepared_data_path, index=False) 170 | return raw_data, prepared_data 171 | 172 | 173 | def create_labels(examples, config, tokenizer): 174 | r''' This is used with a prepared HF dataset that is already tokenized. It will add labels 175 | so that only the AI generated parts (answers) will be trained on. 176 | ''' 177 | 178 | user_token_id = tokenizer.vocab[config.special_tokens.user] 179 | ai_token_id = tokenizer.vocab[config.special_tokens.ai] 180 | # Everything written by an AI will be used for training, and everything by a user will be ignored 181 | 182 | examples['labels'] = [] 183 | for i in range(len(examples['input_ids'])): 184 | labels = [] 185 | ignore = True 186 | for tkn_id in examples['input_ids'][i]: 187 | if tkn_id == user_token_id: 188 | ignore = True 189 | elif tkn_id == ai_token_id: 190 | ignore = False 191 | 192 | if ignore: 193 | labels.append(config.train.ignore_index) 194 | else: 195 | labels.append(tkn_id) 196 | examples['labels'].append(labels) 197 | return examples 198 | 199 | 200 | def pack_examples(examples, block_size, packing_type='partial'): 201 | r''' Used with a prepared HF dataset, will pack/group examples. Use with care, can mess up many things 202 | if the input is not formated properly (requires the <|eod|> token). 203 | 204 | packing_type: partial/full/no 205 | ''' 206 | # Concatenate all texts. 207 | if packing_type == 'partial': 208 | result = {k:[] for k in examples.keys()} 209 | _key = list(examples.keys())[0] # Take whichever key 210 | new_example = {k:[] for k in examples.keys()} 211 | 212 | for ind in range(len(examples[_key])): 213 | # Trim long sequences to block_size, this is required for partial packing 214 | example = {k:v[ind][0:block_size] for k,v in examples.items()} 215 | if len(new_example[_key]) + len(example[_key]) > block_size: 216 | result = {k:result[k] + [v] for k,v in new_example.items()} 217 | new_example = example 218 | else: 219 | new_example = {k:new_example[k] + v for k,v in example.items()} 220 | # Add the last example if there is something to add 221 | if len(new_example[_key]) > 0: 222 | result = {k:result[k] + [v] for k,v in new_example.items()} 223 | elif packing_type == 'full': 224 | # Full packing 225 | concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()} 226 | total_length = len(concatenated_examples[list(examples.keys())[0]]) 227 | total_length = (total_length // block_size) * block_size 228 | # Split by chunks of max_len. 229 | result = { 230 | k: [t[i : i + block_size] for i in range(0, total_length, block_size)] 231 | for k, t in concatenated_examples.items() 232 | } 233 | else: 234 | # Do nothing 235 | result = examples 236 | return result 237 | -------------------------------------------------------------------------------- /opengpt/model_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | def add_tokens_to_model_and_tokenizer(config, tokenizer, model): 4 | ntkns = tokenizer.add_tokens(list(config.special_tokens.values())) 5 | logging.warning(f"Added: {ntkns} tokens to the tokenizer") 6 | if ntkns > 0: 7 | input_embeddings = model.get_input_embeddings().weight.data 8 | output_embeddings = model.get_output_embeddings().weight.data 9 | input_embeddings_avg = input_embeddings[:-ntkns].mean(dim=0, keepdim=True) 10 | output_embeddings_avg = output_embeddings[:-ntkns].mean(dim=0, keepdim=True) 11 | model.resize_token_embeddings(len(tokenizer)) 12 | input_embeddings[-ntkns:] = input_embeddings_avg 13 | output_embeddings[-ntkns:] = output_embeddings_avg 14 | 15 | # Set the eos and pad tokens properly 16 | tokenizer.add_special_tokens({"eos_token": config.special_tokens.eos, "pad_token": config.special_tokens.pad}) 17 | model.config.eos_token_id = tokenizer.eos_token_id 18 | 19 | assert model.get_input_embeddings().num_embeddings == len(tokenizer) -------------------------------------------------------------------------------- /opengpt/parsers.py: -------------------------------------------------------------------------------- 1 | r''' 2 | Parsers are used to parse the output from a Teacher (OpenAI, Google, ...) into the right format. The purpose of the paraser is to 3 | parse the new output and append it to the prepared_data. Every parser will receive: 4 | - data: the new data output from a Teacher model 5 | - prepared_data: the dataset we are creating, in other words old data that was output by a parser 6 | - prompt_config: the prompt_config for the current prompt as a dictionary (taken from the .yaml file) 7 | - config: general config, ie the whole .yaml file as a python-box (can be used as a dictionary) 8 | - row: the row from the original CSV that was used for context to generate the `data`, can be empty given the use-case 9 | - raw_data_id: the ID of the `data` in the raw_data CSV (used to store the raw output from OpenAI) 10 | - prompt_text: the prepared prompt that was used to generate `data` 11 | 12 | If we are running the paraser for the first time the `prepared_data` will be empty (None) and it is up to us to define how that prepared_data (e.g. CSV) should look. Every parser can have different columns depending on the use-case. 13 | 14 | If the parser will output the final prepeared data that will be used for model training, it should append special tokens: config.special_tokens.[user, ai, eos, eod], 15 | have a look at the functions below (e.g. csv_qa_parser). 16 | ''' 17 | 18 | import pandas as pd 19 | from io import StringIO 20 | import re 21 | import logging 22 | 23 | def csv_qa_parser(data, prepared_data, prompt_config, config, row, raw_data_id, prompt_text): 24 | r''' Expects data in the CSV format, with the separator `;`, the dataframe has to have two columns: `Question`, `Answer` 25 | ''' 26 | qa_pairs = None 27 | df = pd.read_csv(StringIO(data), sep=';') 28 | 29 | # Strip everything 30 | df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x) 31 | 32 | ref_col = prompt_config.get('reference_column_to_append', None) 33 | if ref_col and row is not None and ref_col in row and row[ref_col]: 34 | # Means we want to append a reference at the end of each Answer 35 | to_append = f"\nReferences:\n- {row[ref_col]}" 36 | df['Answer'] = df['Answer'] + to_append 37 | df['Question'] += f' {config.special_tokens.eos}' # Every Q/A pair is independent 38 | df['Answer'] += f' {config.special_tokens.eos} {config.special_tokens.eod}' 39 | qa_pairs = [f'{config.special_tokens.user} {q.strip()} {config.special_tokens.ai} {a.strip()}' for q,a in df[['Question', 'Answer']].values] 40 | 41 | new_data = pd.DataFrame([[text, raw_data_id] for text in qa_pairs], columns=['text', 'raw_data_id']) 42 | if prepared_data is None: 43 | prepared_data = new_data 44 | else: 45 | prepared_data = pd.concat([prepared_data, new_data], ignore_index=True) 46 | 47 | return prepared_data 48 | 49 | 50 | instruction_text = re.compile(r'Instruction:?(.*?)Input:', re.DOTALL) 51 | input_text = re.compile(r'Input:?(.*?)Output:?', re.DOTALL) 52 | output_text = re.compile(r'Output:?(.*?)$', re.DOTALL) 53 | def task_parser(data, prepared_data, prompt_config, row, config, raw_data_id, prompt_text): 54 | r''' This parser can be used with prompts similar to Alpaca, it expects `data` in the following format: 55 | Task: 56 | Instruction: 57 | Input: 58 | Output: 59 | 60 | Task: 61 | Instruction: 62 | Input: 63 | Output: 64 | . 65 | . 66 | . 67 | ''' 68 | tasks = re.split(r'[1-9 \.]*Task[:\s]*', str(data)) 69 | st = config.special_tokens 70 | new_data = [] 71 | for task in tasks: 72 | task = task.strip() 73 | ins = re.search(instruction_text, task).group(1).strip() 74 | inp = re.search(input_text, task).group(1).strip() 75 | out = re.search(output_text, task).group(1).strip() 76 | 77 | if inp: 78 | if inp.startswith('"'): 79 | inp = inp[1:] 80 | if inp.endswith('"'): 81 | inp = inp[:-1] 82 | if inp == '': 83 | inp = '' 84 | else: 85 | inp = '\n' + str(inp) 86 | 87 | if ins and out: 88 | if inp in ins: 89 | new_data.append((len(prepared_data), f'{st.user} {ins} {st.eos} {st.ai} {out} {st.eos} {st.eod}', raw_data_id)) 90 | else: 91 | new_data.append((len(prepared_data), f'{st.user} {ins}{inp} {st.eos} {st.ai} {out} {st.eos} {st.eod}', raw_data_id)) 92 | 93 | new_data = pd.DataFrame(new_data, columns=['text', 'raw_data_id']) 94 | if prepared_data is None: 95 | prepared_data = new_data 96 | else: 97 | prepared_data = pd.concat([prepared_data, new_data], ignore_index=True) 98 | 99 | return prepared_data 100 | 101 | 102 | def simple_task_parser(data, prepared_data, prompt_config, row, config, raw_data_id, prompt_text): 103 | r''' This parser can be used with prompts similar to Alpaca, but that only have Instructions, it expects data : 104 | Task Number: 105 | Instruction: 106 | 107 | Task Number: 108 | Instruction: 109 | 110 | This parser is used as an intermediate, so the output is a csv with columns `text`, `instruction`, `raw_data_id` 111 | . 112 | . 113 | . 114 | ''' 115 | tasks = [x.replace("Instruction:", "").strip() for x in re.split(r'[1-9 \.]*Task Number[:\s]*[\d\n]*', str(data)) if x.strip()] 116 | new_data = [] 117 | for task in tasks: 118 | task = task.strip() 119 | 120 | new_data = pd.DataFrame([[[row['text']], task, raw_data_id] for task in tasks], columns=['text', 'instruction', 'raw_data_id']) 121 | if prepared_data is None: 122 | prepared_data = new_data 123 | else: 124 | prepared_data = pd.concat([prepared_data, new_data], ignore_index=True) 125 | 126 | return prepared_data 127 | 128 | 129 | def medical_conversation_parser(data, prepared_data, prompt_config, config, row, raw_data_id, prompt_text): 130 | r''' It expects data to be in form of a conversation, like: 131 | Patient: 132 | AI-Assistant: 133 | Patient: 134 | . 135 | . 136 | . 137 | The actor names 'Patient' and 'AI-Assistant" have to match exactlty 138 | ''' 139 | conversation = None 140 | 141 | # Merge the extractions into one conversation 142 | data = re.split(r'\s*(Patient\s*:|AI-Assistant\s*:)\s*', data)[1:] 143 | if len(data) > 0: 144 | conversation = "" 145 | to_append = None 146 | 147 | ref_col = prompt_config.get('reference_column_to_append', None) 148 | if ref_col and ref_col in row and row[ref_col]: 149 | # Means we want to append a reference at the end of each Answer 150 | to_append = f"\nReferences:\n- {row[ref_col]}" 151 | 152 | actor = None 153 | for message in data: 154 | message = message.strip() 155 | if message in ['Patient:', 'AI-Assistant:', 'Patient', 'AI-Assistant', 'Patient :', 'AI-Assistant :']: 156 | actor = message 157 | elif actor is not None: #TODO: Make this nicer 158 | if actor in ['Patient:', 'Patient :', 'Patient']: 159 | conversation += f'{config.special_tokens.user} {message} {config.special_tokens.eos} ' 160 | elif actor in ['AI-Assistant:', 'AI-Assistant :', 'AI-Assistant']: 161 | conversation += f'{config.special_tokens.ai} {message}' 162 | if to_append is not None and to_append: 163 | conversation += to_append 164 | conversation += f" {config.special_tokens.eos} " 165 | if conversation: 166 | conversation = conversation.strip() + f" {config.special_tokens.eod}" 167 | 168 | new_data = pd.DataFrame([[conversation, raw_data_id]], columns=['text', 'raw_data_id']) 169 | if prepared_data is None: 170 | prepared_data = new_data 171 | else: 172 | prepared_data = pd.concat([prepared_data, new_data], ignore_index=True) 173 | 174 | return prepared_data 175 | 176 | 177 | def csv_ner_parser(data, prepared_data, prompt_config, config, row, raw_data_id, prompt_text): 178 | r''' Expects data in CSV format, using the `;` separator 179 | ''' 180 | df = pd.read_csv(StringIO(data), sep=';', engine='python') 181 | df['raw_data_id'] = raw_data_id 182 | 183 | if prepared_data is None: 184 | prepared_data = df 185 | else: 186 | prepared_data = pd.concat([prepared_data, df], ignore_index=True) 187 | 188 | return prepared_data -------------------------------------------------------------------------------- /opengpt/prompt_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import hashlib 3 | import os 4 | import logging 5 | 6 | def add_to_prompt_database(text, description, parser, database_path, force_replace=False): 7 | r''' The database is a simple json file where all the prompts are saved. 8 | ''' 9 | if os.path.exists(database_path): 10 | logging.info(f"Loading db from: {database_path}") 11 | db = json.load(open(database_path, 'r')) 12 | hashes = set([prompt['hash'] for prompt in db]) 13 | else: 14 | db = [] 15 | hashes = set() 16 | 17 | # Good enough for what we need 18 | h = hashlib.sha256(text.encode("utf-8")).hexdigest()[:10] 19 | if force_replace and h in hashes: 20 | logging.warning("Found an existing prompt with the same hash, it will be replaced with the new one.") 21 | # Remove the prompt with the hash as the current one 22 | db = [prompt for prompt in db if prompt['hash'] != h] 23 | hashes = set([prompt['hash'] for prompt in db]) 24 | if h not in hashes: 25 | db.append({ 26 | 'hash': h, 27 | 'text': text, 28 | 'description': description, 29 | 'parser': parser 30 | }) 31 | 32 | json.dump(db, open(database_path, 'w'), indent=2) 33 | logging.warning(f"Added prompt: {h}") 34 | else: 35 | logging.warning("The prompt is already in the database. It will not be added, you can use force_replace if you really want to add it.") 36 | 37 | return db -------------------------------------------------------------------------------- /opengpt/teachers.py: -------------------------------------------------------------------------------- 1 | import openai 2 | 3 | def ask_openai(prompt, config): 4 | response = openai.ChatCompletion.create( 5 | model = config.teacher.model, 6 | messages = [ 7 | {"role": "user", "content": prompt}, 8 | ] 9 | ) 10 | 11 | message = None 12 | if response['choices'][0]['finish_reason'] == 'stop': 13 | message = response['choices'][0]['message']['content'] 14 | 15 | return message -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | . 2 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | from setuptools.command.install import install 3 | from setuptools.command.develop import develop 4 | from setuptools.command.egg_info import egg_info 5 | 6 | with open("./README.md", "r") as fh: 7 | long_description = fh.read() 8 | 9 | setuptools.setup( 10 | name="opengpt", 11 | version="0.0.5", 12 | author="w-is-h", 13 | author_email="w.kraljevic@gmail.com", 14 | description="OpenGPT a framework for producing grounded domain specific LLMs, and NHS-LLM a conversational model for healthcare made using OpenGPT.", 15 | long_description=long_description, 16 | long_description_content_type="text/markdown", 17 | url="https://github.com/cogstack/opengpt", 18 | packages=['opengpt'], 19 | install_requires=[ 20 | 'datasets>=2,<3', 21 | 'transformers>=4.2,<5', 22 | 'tiktoken>=0.3.2', 23 | 'pandas', 24 | 'openai', 25 | 'numpy', 26 | 'tqdm', 27 | 'python-box', 28 | 'jsonpickle', 29 | ], 30 | classifiers=[ 31 | "Programming Language :: Python :: 3", 32 | "License :: OSI Approved :: MIT License", 33 | "Operating System :: OS Independent", 34 | ], 35 | ) 36 | --------------------------------------------------------------------------------