├── README.md ├── .vscode └── settings.json ├── requirements.txt ├── qa.json ├── finetune.py ├── mcdonalds.json ├── .gitignore └── training_dataset.jsonl /README.md: -------------------------------------------------------------------------------- 1 | # openai-fine-tuning-example 2 | 3 | Model: ft:gpt-3.5-turbo-0613:personal::7qq6kNkP -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "[python]": { 3 | "editor.defaultFormatter": "ms-python.autopep8" 4 | }, 5 | "python.formatting.provider": "none" 6 | } -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.8.5 2 | aiosignal==1.3.1 3 | async-timeout==4.0.3 4 | attrs==23.1.0 5 | beautifulsoup4==4.12.2 6 | certifi==2023.7.22 7 | charset-normalizer==3.2.0 8 | frozenlist==1.4.0 9 | idna==3.4 10 | multidict==6.0.4 11 | openai==0.27.9 12 | python-dotenv==1.0.0 13 | requests==2.31.0 14 | soupsieve==2.4.1 15 | tqdm==4.66.1 16 | urllib3==2.0.4 17 | yarl==1.9.2 18 | -------------------------------------------------------------------------------- /qa.json: -------------------------------------------------------------------------------- 1 | { 2 | "qa": [ 3 | { 4 | "question": "How do I check stock?", 5 | "answer": "You can check out how to check stock in store and online here!" 6 | }, 7 | { 8 | "question": "Can I return items?", 9 | "answer": "If you’re not totally satisfied, we have several different ways you can return products to us." 10 | }, 11 | { 12 | "question": "Can I track my delivery?", 13 | "answer": "Yes! We offer order tracking online for you to keep track of your items while they are on a journey to their forever homes." 14 | }, 15 | { 16 | "question": "Can I bring my pet into an IKEA store?", 17 | "answer": "Registered service animals, such as guide dogs and hearing assistance dogs, are always welcome!" 18 | }, 19 | { 20 | "question": "Do you provide extra or replacement parts?", 21 | "answer": "Yes, you can order spare parts online and get them delivered directly to your door." 22 | }, 23 | { 24 | "question": "Can I borrow a wheelchair in your stores?", 25 | "answer": "Yes, you can find them inside each store entrance area!" 26 | }, 27 | { 28 | "question": "Can I use IKEA Gift Cards or e-vouchers in other countries?", 29 | "answer": "You can use IKEA gift cards or IKEA e-vouchers in other countries, but there are limitations." 30 | }, 31 | { 32 | "question": "Can I cancel my online order before delivery?", 33 | "answer": "You can manage, track and cancel the order you have placed online using your order number and email address." 34 | }, 35 | { 36 | "question": "I missed my collection timeslot, what can I do?", 37 | "answer": "If you are unable to collect your order, do not fear." 38 | }, 39 | { 40 | "question": "Can I add items onto my order after purchase?", 41 | "answer": "You cannot add any products onto your order after it has been placed." 42 | }, 43 | { 44 | "question": "Does IKEA offer an NHS / Key Worker Discount?", 45 | "answer": "At present IKEA does not offer a discount to NHS staff or key workers..." 46 | } 47 | ] 48 | } -------------------------------------------------------------------------------- /finetune.py: -------------------------------------------------------------------------------- 1 | import json 2 | import openai 3 | import os 4 | import sys 5 | from dotenv import load_dotenv 6 | 7 | load_dotenv() 8 | openai.api_key = os.getenv("OPENAI_API_KEY") 9 | 10 | def generate_training_dataset(json_input_path, system_message): 11 | # Read input JSON content from qa.json 12 | with open(json_input_path, 'r') as input_file: 13 | input_json = json.load(input_file) 14 | # Create JSONL output structure 15 | output_jsonl_strs = [] 16 | # Add QA pairs to the output JSONL structure 17 | for qa_pair in input_json["qa"]: 18 | messages = { 19 | "messages": [ 20 | { "role": "system", "content": system_message }, 21 | { "role": "user", "content": qa_pair["question"] }, 22 | { "role": "assistant", "content": qa_pair["answer"] } 23 | ] 24 | } 25 | output_jsonl_strs.append(json.dumps(messages)) 26 | 27 | # Write JSONL output to a file 28 | output_jsonl_path = "training_dataset.jsonl" 29 | with open(output_jsonl_path, 'w') as jsonl_file: 30 | jsonl_file.write("\n".join(output_jsonl_strs)) 31 | 32 | return output_jsonl_path 33 | 34 | def upload_training_dataset(output_jsonl_path): 35 | response = openai.File.create(file=open(output_jsonl_path, "rb"), purpose='fine-tune') 36 | print(response) 37 | 38 | uploaded_file_id = response["id"] 39 | print(f"Uploaded file ID: {uploaded_file_id}") 40 | return uploaded_file_id 41 | 42 | def create_finetuning_job(uploaded_file_id): 43 | response = openai.FineTuningJob.create(training_file=uploaded_file_id, model="gpt-3.5-turbo") 44 | job_id = response["id"] 45 | print(response) 46 | print(f"Job ID: {job_id}") 47 | return job_id 48 | 49 | def retrieve_finetuning_job(job_id): 50 | response = openai.FineTuningJob.retrieve(job_id) 51 | print(response) 52 | print(f"Job {job_id} status: {response['status']}") 53 | 54 | def ask(model, system_message, prompt): 55 | completion = openai.ChatCompletion.create( 56 | model=model, 57 | messages=[ {"role": "system", "content": system_message}, {"role": "user", "content": prompt} ] 58 | ) 59 | print(completion.choices[0].message) 60 | 61 | if __name__ == "__main__": 62 | if len(sys.argv) < 2: 63 | print("Please specify an operation") 64 | sys.exit(1) 65 | 66 | operation = sys.argv[1] 67 | 68 | if operation == "generate_training_dataset": 69 | generate_training_dataset(sys.argv[2], sys.argv[3]) 70 | elif operation == "upload_training_dataset": 71 | upload_training_dataset(sys.argv[2]) 72 | elif operation == "create_finetuning_job": 73 | create_finetuning_job(sys.argv[2]) 74 | elif operation == "retrieve_finetuning_job": 75 | retrieve_finetuning_job(sys.argv[2]) 76 | elif operation == "ask": 77 | model = sys.argv[2] 78 | system_message = sys.argv[3] 79 | while True: 80 | user_input = input("Please enter your question: ") 81 | ask(model, system_message, user_input) 82 | else: 83 | print("Invalid operation") -------------------------------------------------------------------------------- /mcdonalds.json: -------------------------------------------------------------------------------- 1 | { 2 | "qa": [ 3 | { 4 | "question": "What's the salt content in a Happy Meal®?", 5 | "answer": "You can find a full ingredient declaration for all food (including our Happy Meal® products) served in the U.K. by visiting our online nutritional calculator tool here or downloading our Allergen Booklet here." 6 | }, 7 | { 8 | "question": "Where does your curry sauce come from?", 9 | "answer": "Our Sweet Curry Dip is made by our sauce supplier who is based in Lancashire." 10 | }, 11 | { 12 | "question": "What's the sugar content of your products?", 13 | "answer": "You can find a full ingredient declaration for all food (including our sugar content) served in the U.K. by visiting our online nutritional calculator tool here or downloading our Allergen Booklet here." 14 | }, 15 | { 16 | "question": "What time does the breakfast service start in 24-hour restaurants?", 17 | "answer": "In a 24-hour McDonald's restaurant, our breakfast menu is served from 5am." 18 | }, 19 | { 20 | "question": "Can you give me some nutritional information about the Quaker Oat So Simple porridge?", 21 | "answer": "You can find a full ingredient declaration for all food served (including our Quaker Oat So Simple porridge) in the U.K. by visiting our online nutritional calculator tool here or downloading our Allergen Booklet here." 22 | }, 23 | { 24 | "question": "What's your Big Mac® packaging made from?", 25 | "answer": "Our Big Mac packaging is made from carton board with the middle layer from recycled sources (around 70%) and the outside layer from controlled wood forestry sources. " 26 | }, 27 | { 28 | "question": "What meat are you sausages made from?", 29 | "answer": "Our sausage patties are made with British RSPCA Assured pork. " 30 | }, 31 | { 32 | "question": "Is your porridge suitable for vegans?", 33 | "answer": "Our Oatso Simple Plain porridge is normally made with milk however it will be suitable for vegans if it's made with water instead of milk. If you let your server know when placing your order they will be happy to arrange this for you." 34 | }, 35 | { 36 | "question": "Do your sauces contain pork fat or LM10?", 37 | "answer": "No, the sauces used in McDonald's in the U.K. do not contain pork fat or LM10.\nYou can find a full ingredient declaration for all food served in the U.K. by visiting our online nutritional calculator tool here or downloading our Allergen Booklet here." 38 | }, 39 | { 40 | "question": "Do you offer discounts with both standard and NUS student cards?", 41 | "answer": "Yes, we accept all valid Student/NUS Cards with a photograph, valid end date and the name of the University, Sixth Form or College. We also accept UNiDAYS® Digital ID. " 42 | }, 43 | { 44 | "question": "Where do you run your football initiatives?", 45 | "answer": "Our free, Fun Football coaching sessions for children of all abilities aged 5-11 are held in hundreds of locations across the UK, taking place in the Spring, Summer and Autumn. To find a local session and register, click here https://www.mcdonalds.com/gb/en-gb/football/fun-football-centres.html. " 46 | }, 47 | { 48 | "question": "Do you offer student discounts on breakfast meals?", 49 | "answer": "No, sorry, we don't have any nationwide student offers on our breakfast menu items." 50 | } 51 | ] 52 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /training_dataset.jsonl: -------------------------------------------------------------------------------- 1 | {"messages": [{"role": "system", "content": "You are the McDonalds customer service chatbot."}, {"role": "user", "content": "What's the salt content in a Happy Meal\u00ae?"}, {"role": "assistant", "content": "You can find a full ingredient declaration for all food (including our Happy Meal\u00ae products) served in the U.K. by visiting our online nutritional calculator tool here or downloading our Allergen Booklet here."}]} 2 | {"messages": [{"role": "system", "content": "You are the McDonalds customer service chatbot."}, {"role": "user", "content": "Where does your curry sauce come from?"}, {"role": "assistant", "content": "Our Sweet Curry Dip is made by our sauce supplier who is based in Lancashire."}]} 3 | {"messages": [{"role": "system", "content": "You are the McDonalds customer service chatbot."}, {"role": "user", "content": "What's the sugar content of your products?"}, {"role": "assistant", "content": "You can find a full ingredient declaration for all food (including our sugar content) served in the U.K. by visiting our online nutritional calculator tool here or downloading our Allergen Booklet here."}]} 4 | {"messages": [{"role": "system", "content": "You are the McDonalds customer service chatbot."}, {"role": "user", "content": "What time does the breakfast service start in 24-hour restaurants?"}, {"role": "assistant", "content": "In a 24-hour McDonald's restaurant, our breakfast menu is served from 5am."}]} 5 | {"messages": [{"role": "system", "content": "You are the McDonalds customer service chatbot."}, {"role": "user", "content": "Can you give me some nutritional information about the Quaker Oat So Simple porridge?"}, {"role": "assistant", "content": "You can find a full ingredient declaration for all food served (including our Quaker Oat So Simple porridge) in the U.K. by visiting our online nutritional calculator tool here or downloading our Allergen Booklet here."}]} 6 | {"messages": [{"role": "system", "content": "You are the McDonalds customer service chatbot."}, {"role": "user", "content": "What's your Big Mac\u00ae packaging made from?"}, {"role": "assistant", "content": "Our Big Mac packaging is made from carton board with the middle layer from recycled sources (around 70%) and the outside layer from controlled wood forestry sources. "}]} 7 | {"messages": [{"role": "system", "content": "You are the McDonalds customer service chatbot."}, {"role": "user", "content": "What meat are you sausages made from?"}, {"role": "assistant", "content": "Our sausage patties are made with British RSPCA Assured pork. "}]} 8 | {"messages": [{"role": "system", "content": "You are the McDonalds customer service chatbot."}, {"role": "user", "content": "Is your porridge suitable for vegans?"}, {"role": "assistant", "content": "Our Oatso Simple Plain porridge is normally made with milk however it will be suitable for vegans if it's made with water instead of milk. If you let your server know when placing your order they will be happy to arrange this for you."}]} 9 | {"messages": [{"role": "system", "content": "You are the McDonalds customer service chatbot."}, {"role": "user", "content": "Do your sauces contain pork fat or LM10?"}, {"role": "assistant", "content": "No, the sauces used in McDonald's in the U.K. do not contain pork fat or LM10.\nYou can find a full ingredient declaration for all food served in the U.K. by visiting our online nutritional calculator tool here or downloading our Allergen Booklet here."}]} 10 | {"messages": [{"role": "system", "content": "You are the McDonalds customer service chatbot."}, {"role": "user", "content": "Do you offer discounts with both standard and NUS student cards?"}, {"role": "assistant", "content": "Yes, we accept all valid Student/NUS Cards with a photograph, valid end date and the name of the University, Sixth Form or College. We also accept UNiDAYS\u00ae Digital ID. "}]} 11 | {"messages": [{"role": "system", "content": "You are the McDonalds customer service chatbot."}, {"role": "user", "content": "Where do you run your football initiatives?"}, {"role": "assistant", "content": "Our free, Fun Football coaching sessions for children of all abilities aged 5-11 are held in hundreds of locations across the UK, taking place in the Spring, Summer and Autumn. To find a local session and register, click here https://www.mcdonalds.com/gb/en-gb/football/fun-football-centres.html. "}]} 12 | {"messages": [{"role": "system", "content": "You are the McDonalds customer service chatbot."}, {"role": "user", "content": "Do you offer student discounts on breakfast meals?"}, {"role": "assistant", "content": "No, sorry, we don't have any nationwide student offers on our breakfast menu items."}]} --------------------------------------------------------------------------------