├── requirements.txt ├── .DS_Store ├── .gitattributes ├── handwrittensample.png ├── README.md ├── InvoiceExtract.py ├── InvoiceExtract_LocalFile.py ├── invoice_schema.json └── .gitignore /requirements.txt: -------------------------------------------------------------------------------- 1 | openai>=1.2.0 -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Unleashed/GPT4o_Vision/HEAD/.DS_Store -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /handwrittensample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI-Unleashed/GPT4o_Vision/HEAD/handwrittensample.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GPT4o_Vision 2 | 3 | Starter code to use new GPT4o vision API to do extract text from an image. Has a file to generate from an internet accessible URL (InvoiceExtract.py) and from a local copy of an image(InvoiceExtract_LocalFile.py). 4 | Step 1: Make sure you have the OpenAI API key environment variable(OPENAI_API_KEY) setup in your environment 5 | 6 | - Steps for mac: https://www.patreon.com/posts/setting-up-your-90435028 7 | - Steps for windows: https://www.patreon.com/posts/securely-setting-90627687 8 | 9 | Step 2: Install the prerequisite libraries (Just OpenAI for this starter code) 10 | 11 | - pip install -r requirements.txt 12 | 13 | Step 3: Run it! 14 | 15 | - python InvoiceExtract.py 16 | - Should generate a JSON file! 17 | -------------------------------------------------------------------------------- /InvoiceExtract.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | import json 3 | import os 4 | from urllib.parse import urlparse 5 | 6 | client = OpenAI() 7 | 8 | def load_json_schema(schema_file: str) -> dict: 9 | with open(schema_file, 'r') as file: 10 | return json.load(file) 11 | 12 | image_url = 'https://www.invoicesimple.com/wp-content/uploads/2018/06/Sample-Invoice-printable.png' 13 | 14 | 15 | invoice_schema = load_json_schema('invoice_schema.json') 16 | response = client.chat.completions.create( 17 | model='gpt-4o', 18 | response_format={"type": "json_object"}, 19 | messages=[ 20 | { 21 | "role": "user", 22 | "content": [ 23 | {"type": "text", "text": "provide JSON file that represents this document. Use this JSON Schema: " + 24 | json.dumps(invoice_schema)}, 25 | { 26 | "type": "image_url", 27 | "image_url": {"url": image_url} 28 | } 29 | ], 30 | } 31 | ], 32 | max_tokens=500, 33 | ) 34 | print(response.choices[0].message.content) 35 | json_data = json.loads(response.choices[0].message.content) 36 | filename_without_extension = os.path.splitext(os.path.basename(urlparse(image_url).path))[0] 37 | json_filename = f"{filename_without_extension}.json" 38 | 39 | with open(json_filename, 'w') as file: 40 | json.dump(json_data, file, indent=4) 41 | 42 | print(f"JSON data saved to {json_filename}") -------------------------------------------------------------------------------- /InvoiceExtract_LocalFile.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | import json 3 | import os 4 | import base64 5 | 6 | client = OpenAI() 7 | 8 | def load_json_schema(schema_file: str) -> dict: 9 | with open(schema_file, 'r') as file: 10 | return json.load(file) 11 | 12 | # Use the local file 'handwrittensample.png' 13 | image_path = 'handwrittensample.png' 14 | 15 | # Load the JSON schema 16 | invoice_schema = load_json_schema('invoice_schema.json') 17 | 18 | # Open the local image file in binary mode 19 | with open(image_path, 'rb') as image_file: 20 | image_base64 = base64.b64encode(image_file.read()).decode('utf-8') 21 | 22 | response = client.chat.completions.create( 23 | model='gpt-4o', 24 | response_format={"type": "json_object"}, 25 | messages=[ 26 | { 27 | "role": "user", 28 | "content": [ 29 | {"type": "text", "text": "provide JSON file that represents this document. Use this JSON Schema: " + 30 | json.dumps(invoice_schema)}, 31 | { 32 | "type": "image_url", 33 | "image_url": { 34 | "url": f"data:image/jpeg;base64,{image_base64}" 35 | } 36 | } 37 | ] 38 | } 39 | ], 40 | max_tokens=500, 41 | ) 42 | 43 | print(response.choices[0].message.content) 44 | json_data = json.loads(response.choices[0].message.content) 45 | filename_without_extension = os.path.splitext(os.path.basename(image_path))[0] 46 | json_filename = f"{filename_without_extension}.json" 47 | 48 | with open(json_filename, 'w') as file: 49 | json.dump(json_data, file, indent=4) 50 | 51 | print(f"JSON data saved to {json_filename}") -------------------------------------------------------------------------------- /invoice_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft/2020-12/schema", 3 | "title": "Invoice", 4 | "description": "A simple invoice format", 5 | "type": "object", 6 | "properties": { 7 | "invoiceNumber": { 8 | "description": "The unique identifier for the invoice.", 9 | "type": "string" 10 | }, 11 | "dateIssued": { 12 | "description": "The date when the invoice was issued.", 13 | "type": "string", 14 | "format": "date" 15 | }, 16 | "dueDate": { 17 | "description": "The date by which the invoice should be paid.", 18 | "type": "string", 19 | "format": "date" 20 | }, 21 | "from": { 22 | "description": "Information about the issuer of the invoice.", 23 | "type": "object", 24 | "properties": { 25 | "name": { "type": "string" }, 26 | "address": { "type": "string" } 27 | }, 28 | "required": ["name", "address"] 29 | }, 30 | "to": { 31 | "description": "Information about the recipient of the invoice.", 32 | "type": "object", 33 | "properties": { 34 | "name": { "type": "string" }, 35 | "address": { "type": "string" } 36 | }, 37 | "required": ["name", "address"] 38 | }, 39 | "items": { 40 | "type": "array", 41 | "description": "List of items included in the invoice.", 42 | "items": { 43 | "type": "object", 44 | "properties": { 45 | "description": { "type": "string" }, 46 | "quantity": { "type": "number" }, 47 | "price": { "type": "number" } 48 | }, 49 | "required": ["description", "quantity", "price"] 50 | } 51 | }, 52 | "total": { 53 | "description": "Total amount of the invoice.", 54 | "type": "number" 55 | } 56 | }, 57 | "required": ["invoiceNumber", "dateIssued", "from", "to", "items", "total"] 58 | } 59 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 105 | __pypackages__/ 106 | 107 | # Celery stuff 108 | celerybeat-schedule 109 | celerybeat.pid 110 | 111 | # SageMath parsed files 112 | *.sage.py 113 | 114 | # Environments 115 | .env 116 | .venv 117 | env/ 118 | venv/ 119 | ENV/ 120 | env.bak/ 121 | venv.bak/ 122 | 123 | # Spyder project settings 124 | .spyderproject 125 | .spyproject 126 | 127 | # Rope project settings 128 | .ropeproject 129 | 130 | # mkdocs documentation 131 | /site 132 | 133 | # mypy 134 | .mypy_cache/ 135 | .dmypy.json 136 | dmypy.json 137 | 138 | # Pyre type checker 139 | .pyre/ 140 | 141 | # pytype static type analyzer 142 | .pytype/ 143 | 144 | # Cython debug symbols 145 | cython_debug/ 146 | 147 | # PyCharm 148 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 149 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 150 | # and can be added to the global gitignore or merged into this file. For a more nuclear 151 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 152 | #.idea/ 153 | --------------------------------------------------------------------------------