├── .env_example ├── .github └── FUNDING.yml ├── .gitignore ├── LICENSE ├── Procfile ├── README.md ├── app.json ├── demos ├── Screenshot_20231230-115838.png └── Screenshot_20231230-120220.png ├── main.py ├── railway.json ├── requirements.txt └── stickers.py /.env_example: -------------------------------------------------------------------------------- 1 | API_ID = 12345 2 | API_HASH = "your_api_hash_here" 3 | BOT_TOKEN = 'your_bot_token_here' 4 | GOOGLE_API_KEY = 'Your_api-key' -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: nuhmanpk 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | *.session 7 | *.session-journal 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | cover/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | .pybuilder/ 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | # For a library or package, you might want to ignore these files since the code is 90 | # intended to run in multiple environments; otherwise, check them in: 91 | # .python-version 92 | 93 | # pipenv 94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 97 | # install all needed dependencies. 98 | #Pipfile.lock 99 | 100 | # poetry 101 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 102 | # This is especially recommended for binary packages to ensure reproducibility, and is more 103 | # commonly ignored for libraries. 104 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 105 | #poetry.lock 106 | 107 | # pdm 108 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 109 | #pdm.lock 110 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 111 | # in version control. 112 | # https://pdm.fming.dev/#use-with-ide 113 | .pdm.toml 114 | 115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 116 | __pypackages__/ 117 | 118 | # Celery stuff 119 | celerybeat-schedule 120 | celerybeat.pid 121 | 122 | # SageMath parsed files 123 | *.sage.py 124 | 125 | # Environments 126 | .env 127 | .venv 128 | env/ 129 | venv/ 130 | ENV/ 131 | env.bak/ 132 | venv.bak/ 133 | 134 | # Spyder project settings 135 | .spyderproject 136 | .spyproject 137 | 138 | # Rope project settings 139 | .ropeproject 140 | 141 | # mkdocs documentation 142 | /site 143 | 144 | # mypy 145 | .mypy_cache/ 146 | .dmypy.json 147 | dmypy.json 148 | 149 | # Pyre type checker 150 | .pyre/ 151 | 152 | # pytype static type analyzer 153 | .pytype/ 154 | 155 | # Cython debug symbols 156 | cython_debug/ 157 | 158 | # PyCharm 159 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 160 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 161 | # and can be added to the global gitignore or merged into this file. For a more nuclear 162 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 163 | #.idea/ 164 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Nuhman Pk 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: python3 main.py 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # VisionScriptBot 2 | A telegram bot that uses Google's Gemini Pro Vision API , Take a demo [here](https://t.me/visionscriptbot). New Version support prompts along with Images, Add your prompt in Image caption before uploading the Image. 3 | 4 | ### Gemini Vision Pro 5 | 6 | Gemini Pro Vision is a Gemini large language vision model that understands input from text and visual modalities (image and video) in addition to text to generate relevant text responses. 7 | 8 | Gemini Pro Vision is a foundation model that performs well at a variety of multimodal tasks such as visual understanding, classification, summarization, and creating content from image and video. It's adept at processing visual and text inputs such as photographs, documents, infographics, and screenshots. 9 | 10 | ## Gemini API 11 | VisionScriptBot uses Google new [Gemini Pro Model](https://ai.google.dev/docs) . 12 | 13 | [Gemini](https://deepmind.google/technologies/gemini/) is Google's latest family of [large language models](https://blog.google/technology/ai/google-gemini-ai/#performance). 14 | 15 | ### API KEY 16 | 17 | You need Google Api key 🔐 for Gemini to run this model. 18 | Get your api key from 19 | https://makersuite.google.com/app/apikey 20 | 21 | 22 | Google's Python SDK for the Gemini API, is contained in the google-generativeai package. Install the dependency using pip: 23 | 24 | 25 | ```bash 26 | pip install -q -U google-generativeai 27 | ``` 28 | 29 | for complete guide [refer](https://ai.google.dev/tutorials/python_quickstart) 30 | 31 | ### Deploy 32 | 33 | Deployed on [Railway.app](https://railway.app?referralCode=O6FeyZ) , do checkout their free hosting plans [here](https://railway.app?referralCode=O6FeyZ) 34 | 35 | #### Use cases 36 | 37 | 1. Visual information seeking: Use external knowledge combined with information extracted from the input image or video to answer questions. 38 | 39 | 1. Object recognition: Answer questions related to fine-grained identification of the objects in images and videos. 40 | 41 | 1. Digital content understanding: Answer questions and extract information from visual content like infographics, charts, figures, tables, and web pages. 42 | 43 | 1. Structured content generation: Generate responses based on multimodal inputs in formats like HTML and JSON. 44 | 45 | 1. Captioning and description: Generate descriptions of images and videos with varying levels of details. 46 | 47 | 1. Reasoning: Compositionally infer new information without memorization or retrieval. 48 | 49 | 50 | ## Demo 51 | 52 | ![](https://github.com/nuhmanpk/VisionScriptBot/blob/main/demos/Screenshot_20231230-115838.png) 53 | 54 | ## Support 55 | 56 | If You find this project useful, Do support me [here](https://github.com/sponsors/nuhmanpk) 57 | 58 | -------------------------------------------------------------------------------- /app.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "VisionScriptBot", 3 | "env": { 4 | "API_ID": { 5 | "description": "your app id, take it from my.telegram.org", 6 | "value": "" 7 | }, 8 | "API_HASH": { 9 | "description": "your app Hash, take it from my.telegram.org", 10 | "value": "" 11 | }, 12 | "BOT_TOKEN": { 13 | "description": "Bot Token From @botFather", 14 | "value": "" 15 | } 16 | }, 17 | "buildpacks": [ 18 | { 19 | "url": "heroku/python" 20 | } 21 | ] 22 | } 23 | -------------------------------------------------------------------------------- /demos/Screenshot_20231230-115838.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nuhmanpk/VisionScriptBot/e68e56b050dc69d99f30cc670ece3744dc4ffedf/demos/Screenshot_20231230-115838.png -------------------------------------------------------------------------------- /demos/Screenshot_20231230-120220.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nuhmanpk/VisionScriptBot/e68e56b050dc69d99f30cc670ece3744dc4ffedf/demos/Screenshot_20231230-120220.png -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # /usr/bin/nuhman/bughunter0 2 | 3 | from dotenv import load_dotenv 4 | from pyrogram import filters, Client 5 | from pyrogram.types import InlineKeyboardMarkup, InlineKeyboardButton, Message 6 | from stickers import stickers 7 | import google.generativeai as genai 8 | import os 9 | import PIL.Image 10 | import random 11 | import time 12 | 13 | load_dotenv() 14 | 15 | API_HASH = os.getenv("API_HASH") 16 | API_ID = os.getenv("API_ID") 17 | BOT_TOKEN = os.getenv("BOT_TOKEN") 18 | GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") 19 | 20 | GITHUB_BUTTON = InlineKeyboardMarkup( 21 | [ 22 | [ 23 | InlineKeyboardButton( 24 | "Source Code", url="https://github.com/nuhmanpk/VisionScriptBot" 25 | ) 26 | ] 27 | ] 28 | ) 29 | 30 | app = Client( 31 | "VisionScriptBot", api_hash=API_HASH, api_id=int(API_ID), bot_token=BOT_TOKEN 32 | ) 33 | 34 | genai.configure(api_key=GOOGLE_API_KEY) 35 | 36 | 37 | @app.on_message(filters.command("start") & filters.private) 38 | async def start(_, message: Message): 39 | welcome_message = ( 40 | f"👋 Hey @{message.chat.username}!\n\n" 41 | "I'm here to help. Just send me an image, and I'll do the rest.\n\n" 42 | "Feel free to explore and use my features. If you have any questions or need assistance, " 43 | "you can use the /help command.\n\n" 44 | "🤖 Don't forget to join @BughunterBots for more awesome bots like me!\n" 45 | ) 46 | await message.reply(welcome_message, quote=True) 47 | 48 | 49 | @app.on_message(filters.command("help") & filters.private) 50 | async def help_command(_, message: Message): 51 | help_message = ( 52 | "🤖 **How to use the Transcription Bot**\n\n" 53 | "1. **Send an Image:** Simply send me an image containing text that you want transcribed.\nGot any question regarding the image? add it in the image caption before uploading.\n" 54 | "2. **Wait for Transcription:** I'll process the image and provide you with the transcribed text.\n\n" 55 | "For updates and more bots, join @BughunterBots 🚀\n" 56 | ) 57 | await message.reply(help_message, quote=True) 58 | 59 | 60 | @app.on_message(filters.photo & filters.private) 61 | async def vision(bot, message: Message): 62 | try: 63 | model_name = "gemini-1.5-pro" 64 | sticker_id = random.choice(stickers) 65 | sticker = await message.reply_sticker(sticker_id) 66 | txt = await message.reply(f"Loading {model_name} ...") 67 | model = genai.GenerativeModel(model_name) 68 | await txt.edit("Downloading Photo ....") 69 | file_path = await message.download() 70 | caption = message.caption 71 | img = PIL.Image.open(file_path) 72 | await txt.edit("Shhh 🤫 , Gemini Vision Pro is At Work ⚠️.\n Pls Wait..") 73 | response = ( 74 | model.generate_content([caption, img]) 75 | if caption 76 | else model.generate_content(img) 77 | ) 78 | os.remove(file_path) 79 | await txt.edit('@VisionScriptBot is cooking...') 80 | await sticker.delete() 81 | await txt.delete() 82 | if response.parts: # handle multiline resps 83 | for part in response.parts: 84 | await message.reply(part.text, reply_markup=GITHUB_BUTTON) 85 | time.sleep(2) 86 | elif response.text: 87 | await message.reply(response.text, reply_markup=GITHUB_BUTTON) 88 | else: 89 | await message.reply( 90 | "Couldn't figure out what's in the Image. Contact @bughunter0 for help." 91 | ) 92 | except Exception as e: 93 | await message.reply("Something Bad occured, Contact @bughunter0") 94 | raise e 95 | 96 | 97 | @app.on_message(filters.document & filters.private) 98 | async def document(bot, message: Message): 99 | await message.reply( 100 | "Documents are not supported, Please the File as Image !!!\n\n @BughunterBots" 101 | ) 102 | 103 | 104 | @app.on_message(filters.command("source") & filters.private) 105 | async def source(bot, message: Message): 106 | msg = ( 107 | "Here is the source code for the bot 🚀\n\n" 108 | "Follow me for updates , and add Your star if you find this helpful 🌟\n\n" 109 | "Thank you for your support! 👏\n\n" 110 | "Happy Coding! 🚀" 111 | ) 112 | await message.reply(msg, reply_markup=GITHUB_BUTTON) 113 | 114 | 115 | app.run(print("Bot Started...")) 116 | -------------------------------------------------------------------------------- /railway.json: -------------------------------------------------------------------------------- 1 | { 2 | "build": { 3 | "builder": "HEROKU" 4 | }, 5 | "deploy": { 6 | "numReplicas": 1, 7 | "sleepApplication": false, 8 | "restartPolicyType": "ON_FAILURE", 9 | "restartPolicyMaxRetries": 10 10 | } 11 | } -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nuhmanpk/VisionScriptBot/e68e56b050dc69d99f30cc670ece3744dc4ffedf/requirements.txt -------------------------------------------------------------------------------- /stickers.py: -------------------------------------------------------------------------------- 1 | stickers = [ 2 | "CAACAgEAAxkBAAEYAqxlkq28KegzHLH0-kZ55e_6FfngeAACRwADnjOcH98isYD5RJTwNAQ", 3 | "CAACAgEAAxkBAAEYAr5lkq67vFPLasP0sf4oIoIV9HotEAACpwIAAnJoIUQEJZjFh8eLIzQE", 4 | "CAACAgEAAxkBAAEYArhlkq5wWchFDbNKZo0UwWb8fdvnTAACLQIAAqcjIUQ9QDDJ7YO0tjQE", 5 | "CAACAgEAAxkBAAEYArplkq6FqCyo7JsD8zONqUfvDslwqwACeQEAAsi3GEQBsI4FdYg9jDQE", 6 | "CAACAgEAAxkBAAEYArxlkq6pXH-2l-3kv271dBPnM7ZCowACXgIAAl3yGUSGa-Q11eKzuTQE", 7 | "CAACAgEAAxkBAAEYAsBlkq7Ju3Fj3h9kTOxNGBP9ZixdAgACVgMAAr-QIUR8-pez2EZI2zQE", 8 | "CAACAgEAAxkBAAEYAshlkq9Xr90XEAVGsRymtQ7I82Ul3QACSQMAAs7EoEafktk_u7gHIzQE", 9 | "CAACAgEAAxkBAAEYAsJlkq77-xcdFMmL983Sfza8mAfIWwACwQEAAm3SUEVB2GFv5BtTzTQE", 10 | "CAACAgEAAxkBAAEYAsplkq-IJjDNCIsad6aneyYasqnzvAACBwIAAknHoUZrTecesaz05zQE" 11 | "CAACAgEAAxkBAAEYAsRlkq8vtbtWWqhRNpDE6IESlP_UQwACTAMAAoIzWUTPkzhJAAElokM0BA", 12 | "CAACAgEAAxkBAAEYAsZlkq84IeqLx8j56othnY-7q14WCgACOwMAAoePWUSmrsnqDArw3jQE", 13 | "CAACAgIAAxkBAAEYAq1lkq28DAo5ep1uoVhLYhm4Hm674gACgBgAAsC2UEmimzNNrlDPPDQE", 14 | "CAACAgIAAxkBAAEYAq5lkq2811GiH2MxZXjHqhlYJwY_aQACoBcAAt-0IEkggKwiuyDGyTQE", 15 | "CAACAgIAAxkBAAEYAq9lkq28kgu8D502UUezvqKFO2CaTAACqhYAApmkEUqpUt4Hkn-q5TQE", 16 | "CAACAgIAAxkBAAEYAqhlkq28KGjH-KawLcVvdLUfqcISWQACzQEAAhZCawrL2Zt7FoIvuDQE", 17 | "CAACAgIAAxkBAAEYAqllkq28y8fyXKsBf-Pc9AN9-1dIdQAC1BEAA8CgSXknAeKPK_QMNAQ", 18 | "CAACAgUAAxkBAAEYAqplkq28CIZj0-cZkEhRrtj4KwMHewADAwACKSRJVGYT9GcdigjdNAQ", 19 | "CAACAgUAAxkBAAEYAqtlkq28rAZzHDw2lQ58NnQIII_m1QACKAQAApv4OFZA66LtASf7izQE", 20 | "CAACAgEAAxkBAAEYAqRlkq0-L_85eTtO6GdIFmZQ1--GTQACSwIAAsF-IUTpsXse4dUMzDQE", 21 | ] 22 | --------------------------------------------------------------------------------