├── .gitattributes ├── requirements.txt ├── utils ├── ocr.py ├── __init__.py ├── notion.py ├── llm.py └── obsidian.py ├── README.md ├── LICENSE ├── main.py └── .gitignore /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit 2 | pandas 3 | streamlit-shadcn-ui>=0.1.18 4 | streamlit_extras 5 | pygwalker>=0.4.8.3 -------------------------------------------------------------------------------- /utils/ocr.py: -------------------------------------------------------------------------------- 1 | import easyocr 2 | def ocr(image_path: str): 3 | reader = easyocr.Reader(['ch_sim','en']) # this needs to run only once to load the model into memory 4 | result = reader.readtext(image_path, detail=0) 5 | return result -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | import os 3 | import base64 4 | 5 | def image_to_base64(image): 6 | buffered = BytesIO() 7 | image.save(buffered, format="PNG") 8 | img_str = base64.b64encode(buffered.getvalue()).decode('utf-8') 9 | return img_str 10 | 11 | def get_markdown_files_in_path(dir: str): 12 | files = [] 13 | for file in os.listdir(dir): 14 | if file.endswith(".md"): 15 | files.append(file) 16 | return files -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Note GPT 2 | Easily transform your images into written notes. 3 | 4 | https://github.com/ObservedObserver/photes-lite/assets/22167673/1c7a1fb0-5c00-4960-9f80-8defe34329db 5 | 6 | This app is initial built by [lab2.dev](https://lab2.dev), the AI App Builder for creating streamlit app. 7 | 8 | ## Overview 9 | 10 | This project provides a Streamlit app that converts images into written notes. Currently, the app can only be run locally, as it writes directly to the file system. 11 | 12 | ## Getting Started 13 | 14 | To use Image-to-Note, follow these steps: 15 | 16 | ### 1. Setup Environment 17 | Create a `.streamlit/secrets.toml` file with the following content: 18 | ```toml 19 | [openai] 20 | api_key = "your_openai_api_key" 21 | base_url = "optional_openai_base_url" # leave blank if not using 22 | ``` 23 | 24 | ### 2. Run Streamlit App 25 | Execute the following command to run the app: 26 | ```bash 27 | streamlit run main.py --server.enableXsrfProtection false 28 | ``` 29 | 30 | ## What's Coming 31 | For a more production-level tool, check Pixno [Pixno (photes.io)](https://photes.io) 32 | 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 observedobserver 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /utils/notion.py: -------------------------------------------------------------------------------- 1 | def create_notion_page(token, database_id, content): 2 | url = "https://api.notion.com/v1/pages" 3 | headers = { 4 | "Authorization": f"Bearer {token}", 5 | "Content-Type": "application/json", 6 | "Notion-Version": "2022-06-28" 7 | } 8 | 9 | # Create a new page with the markdown content 10 | data = { 11 | "parent": { "database_id": database_id }, 12 | "properties": { 13 | "title": [ 14 | { 15 | "text": { 16 | "content": "Markdown Content" 17 | } 18 | } 19 | ] 20 | }, 21 | "children": [ 22 | # { 23 | # "object": "block", 24 | # "type": "image", 25 | # "image": { 26 | # "type": "external", 27 | # "external": { 28 | # "url": image_url 29 | # } 30 | # } 31 | # }, 32 | { 33 | "object": "block", 34 | "type": "paragraph", 35 | "paragraph": { 36 | "rich_text": [ 37 | { 38 | "type": "text", 39 | "text": { 40 | "content": content 41 | } 42 | } 43 | ] 44 | } 45 | } 46 | ] 47 | } 48 | 49 | response = requests.post(url, headers=headers, data=json.dumps(data)) 50 | 51 | if response.status_code == 200: 52 | print("Page created successfully!") 53 | else: 54 | print(f"Failed to create page: {response.status_code} - {response.text}") 55 | -------------------------------------------------------------------------------- /utils/llm.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | 3 | 4 | def generated_notes_from_images(client: OpenAI, image_base64: str, ocr_enhance_info: list[str] = None, model: str="gpt-4o"): 5 | ocr_prompt = "" 6 | if ocr_enhance_info is not None: 7 | ocr_prompt = f""" 8 | Here are some text extracted from the image with OCR program, it might help you to check the information when you are not sure. 9 | 10 | {{"\n".join(ocr_enhance_info)}} 11 | 12 | """ 13 | prompt = f"""You are a great note taker, for the uploaded images, generate notes contains import information. 14 | The images usually taken from slides, meeting notes, what you need to do is help to take notes from the images. 15 | Output the information directly, do not say anything else. 16 | Make it more like a note contains important information, not a description of the image. 17 | A good structure of generated notes is like: 18 | 19 | ## <-title-> 20 | <-a paragraph of summary-> 21 | <-details information from the image-> 22 | 23 | If there are some tables, try to extract all orignial information in table format. 24 | Use the same language as the image, do not change the language. 25 | {ocr_prompt} 26 | """ 27 | response = client.chat.completions.create( 28 | model=model, 29 | messages=[ 30 | { 31 | "role": "user", 32 | "content": [ 33 | {"type": "text", "text": prompt}, 34 | { 35 | "type": "image_url", 36 | "image_url": { 37 | "url": f"data:image/png;base64,{image_base64}" 38 | }, 39 | }, 40 | ], 41 | }, 42 | ], stream=True) 43 | notes = "" 44 | for chunk in response: 45 | if chunk.choices[0].delta.content is not None: 46 | notes = chunk.choices[0].delta.content 47 | yield notes 48 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from PIL import Image 3 | from openai import OpenAI 4 | from utils import image_to_base64 5 | from utils.llm import generated_notes_from_images 6 | from utils.obsidian import append_to_obsidian_file, prepare_obsidian_writepath 7 | from utils.ocr import ocr 8 | 9 | api_key = st.secrets["OPENAI_API_KEY"] 10 | openai_base_url = st.secrets.get("OPENAI_BASE_URL") 11 | notion_token = st.secrets["NOTION_API_KEY"] 12 | 13 | default_vault_path = '/Users/observedobserver/Documents/obsidian-notes/elwynn-library/image-to-notes' 14 | client = OpenAI(api_key=api_key, base_url=openai_base_url) 15 | 16 | with st.sidebar: 17 | import qrcode 18 | from io import BytesIO 19 | from streamlit.web.server import server_util 20 | internal_ip = st.net_util.get_internal_ip() 21 | url = server_util.get_url(internal_ip) 22 | img = qrcode.make(url) 23 | 24 | buffer = BytesIO() 25 | img.save(buffer, format="PNG") 26 | img_bytes = buffer.getvalue() 27 | 28 | st.image(img_bytes, caption=f"scan to open {url}", use_column_width=True) 29 | obsidian_db = st.text_input('Obsidian DB', value=default_vault_path) 30 | model = st.selectbox('Model', ['gpt-4o', 'gpt-4-vision']) 31 | 32 | ocr_enhance = st.toggle('Use OCR Enhance', False) 33 | 34 | st.title('Turn your photos into notes with AI') 35 | title = st.text_input('Note Title', value='New Notes') 36 | 37 | img_files = st.file_uploader('Your photos', accept_multiple_files=True) 38 | 39 | all_done = 0 40 | if img_files is not None: 41 | for img_file in img_files: 42 | image = Image.open(img_file) 43 | # save image to local and get the path 44 | 45 | with st.spinner('Taking notes...'): 46 | base64_str = image_to_base64(image) 47 | note_path = prepare_obsidian_writepath(note_title=title, vault_path=obsidian_db, uploaded_file=img_file) 48 | ocr_result = None 49 | if ocr_enhance: 50 | image.save(f'./{img_file.name}') 51 | ocr_result = ocr(f'./{img_file.name}') 52 | notes_gen = generated_notes_from_images(client=client, image_base64=base64_str, ocr_enhance_info=ocr_result, model=model) 53 | for notes in notes_gen: 54 | append_to_obsidian_file(content=notes, file_path=note_path) 55 | st.image(image, caption='Uploaded Image', use_column_width=True) 56 | all_done += 1 57 | st.success('Done!') 58 | if all_done == len(img_files) and all_done > 0: 59 | st.balloons() 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 105 | __pypackages__/ 106 | 107 | # Celery stuff 108 | celerybeat-schedule 109 | celerybeat.pid 110 | 111 | # SageMath parsed files 112 | *.sage.py 113 | 114 | # Environments 115 | .env 116 | .venv 117 | env/ 118 | venv/ 119 | ENV/ 120 | env.bak/ 121 | venv.bak/ 122 | 123 | # Spyder project settings 124 | .spyderproject 125 | .spyproject 126 | 127 | # Rope project settings 128 | .ropeproject 129 | 130 | # mkdocs documentation 131 | /site 132 | 133 | # mypy 134 | .mypy_cache/ 135 | .dmypy.json 136 | dmypy.json 137 | 138 | # Pyre type checker 139 | .pyre/ 140 | 141 | # pytype static type analyzer 142 | .pytype/ 143 | 144 | # Cython debug symbols 145 | cython_debug/ 146 | 147 | # PyCharm 148 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 149 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 150 | # and can be added to the global gitignore or merged into this file. For a more nuclear 151 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 152 | #.idea/ 153 | 154 | .streamlit -------------------------------------------------------------------------------- /utils/obsidian.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | def add_markdown_to_obsidian(note_title, content, vault_path, uploaded_file): 5 | """ 6 | Adds Markdown content and an uploaded image to an Obsidian note. Creates a new note if it doesn't exist. 7 | 8 | :param note_title: Title of the Obsidian note. 9 | :param content: Markdown content to be added. 10 | :param vault_path: Path to the Obsidian vault. 11 | :param uploaded_file: File-like object of the uploaded image. 12 | """ 13 | import os 14 | import shutil 15 | 16 | # Ensure the vault path exists 17 | if not os.path.exists(vault_path): 18 | raise FileNotFoundError(f"The specified vault path '{vault_path}' does not exist.") 19 | 20 | # Create the full path to the note 21 | note_filename = f"{note_title}.md" 22 | note_path = os.path.join(vault_path, note_filename) 23 | 24 | # Handle the uploaded file 25 | image_filename = uploaded_file.name 26 | image_dir = os.path.join(vault_path, f'assets_{note_title}') 27 | if not os.path.exists(image_dir): 28 | os.makedirs(image_dir) 29 | target_image_path = os.path.join(image_dir, image_filename) 30 | 31 | # Save the uploaded file to the vault 32 | with open(target_image_path, 'wb') as out_file: 33 | out_file.write(uploaded_file.getbuffer()) 34 | 35 | # Create the Markdown for the image 36 | image_markdown = f"![{image_filename}]({image_filename})\n" 37 | 38 | # Append the content and image markdown to the note (create the note if it doesn't exist) 39 | with open(note_path, 'a') as note_file: 40 | if image_markdown: 41 | note_file.write(image_markdown + '\n') 42 | note_file.write(content + '\n') 43 | 44 | return note_path 45 | 46 | 47 | def prepare_obsidian_writepath(note_title, vault_path, uploaded_file): 48 | """ 49 | Adds Markdown content and an uploaded image to an Obsidian note. Creates a new note if it doesn't exist. 50 | 51 | :param note_title: Title of the Obsidian note. 52 | :param content: Markdown content to be added. 53 | :param vault_path: Path to the Obsidian vault. 54 | :param uploaded_file: File-like object of the uploaded image. 55 | """ 56 | import os 57 | import shutil 58 | 59 | # Ensure the vault path exists 60 | if not os.path.exists(vault_path): 61 | raise FileNotFoundError(f"The specified vault path '{vault_path}' does not exist.") 62 | 63 | # Create the full path to the note 64 | note_filename = f"{note_title}.md" 65 | note_path = os.path.join(vault_path, note_filename) 66 | 67 | # Handle the uploaded file 68 | image_filename = uploaded_file.name 69 | image_dir = os.path.join(vault_path, f'assets_{note_title}') 70 | if not os.path.exists(image_dir): 71 | os.makedirs(image_dir) 72 | target_image_path = os.path.join(image_dir, image_filename) 73 | 74 | # Save the uploaded file to the vault 75 | with open(target_image_path, 'wb') as out_file: 76 | out_file.write(uploaded_file.getbuffer()) 77 | 78 | # Create the Markdown for the image 79 | image_markdown = f"![{image_filename}]({image_filename})\n" 80 | 81 | # Append the content and image markdown to the note (create the note if it doesn't exist) 82 | with open(note_path, 'a') as note_file: 83 | if image_markdown: 84 | note_file.write(image_markdown + '\n') 85 | 86 | return note_path 87 | 88 | 89 | def append_to_obsidian_file(content: str, file_path: str): 90 | # Open the file in append mode 91 | with open(file_path, 'a') as f: 92 | f.write(content) --------------------------------------------------------------------------------