├── .env.example ├── .gitignore ├── README.md ├── app.py ├── llm.py ├── pc_command.py ├── requirements.txt ├── static ├── img │ └── microphone.png ├── recorder.css └── recorder.js ├── templates └── recorder.html ├── transcriber.py ├── tts.py └── weather.py /.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= 2 | ELEVENLABS_API_KEY= 3 | WEATHER_API_KEY= 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.mp3 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | cover/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | .pybuilder/ 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | # For a library or package, you might want to ignore these files since the code is 89 | # intended to run in multiple environments; otherwise, check them in: 90 | # .python-version 91 | 92 | # pipenv 93 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 94 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 95 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 96 | # install all needed dependencies. 97 | #Pipfile.lock 98 | 99 | # poetry 100 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 101 | # This is especially recommended for binary packages to ensure reproducibility, and is more 102 | # commonly ignored for libraries. 103 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 104 | #poetry.lock 105 | 106 | # pdm 107 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 108 | #pdm.lock 109 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 110 | # in version control. 111 | # https://pdm.fming.dev/#use-with-ide 112 | .pdm.toml 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Asistente virtual 3 | Este repositorio es el código para el video del Asistente Virtual en el canal Ringa Tech: 4 | https://youtu.be/-0tIy8wWtzE 5 | 6 | ## Configuración 7 | Para ejecutar el proyecto es necesario: 8 | - Descargar el repositorio 9 | - Opcional: Crea un ambiente virtual 10 | - Instala las dependencias ejecutando 11 | - ``` pip install -r requirements.txt ``` 12 | - Crea un archivo llamado ```.env``` 13 | - En el archivo coloca las llaves. Para el proyecto tal cual del video (y este repositorio) estoy utilizando: 14 | - ```OPENAI_API_KEY=XXXXXX``` 15 | - ```ELEVENLABS_API_KEY=XXXXXX``` 16 | - ```WEATHER_API_KEY=XXXXXX``` 17 | 18 | ## Ajustes 19 | El proyecto cuenta con algunas cosas que quizá quieras modificar, por ejemplo: 20 | 21 | - En la clase LLM puedes modificar para que el asistente no sea "malhablado". Se utiliza en 2 lugares del archivo. 22 | - En la clase PcCommand, abre Chrome buscándolo en una ruta fija para Windows. Puedes modificarlo para que busque el ejecutable en Mac / Linux. 23 | 24 | ## Ejecución 25 | - Este proyecto utiliza Flask. Puedes levantar el servidor en modo debug por defecto en el puerto 5000 con el comando 26 | - ```flask --app app run --debug``` 27 | - En tu navegador ve a http://localhost:5000 28 | - Da clic para comenzar a grabar (pedirá permiso). Dar clic para dejar de grabar 29 | - Espera y ve como domina al mundo 30 | 31 | 32 | ## ¿Problemas? 33 | 34 | Solo lo probé en mi equipo así que si tienes problemas, levanta un issue aquí en Github, con el mayor detalle que puedas (versión de python, de paquetes, mensaje completo de error, etc). 35 | 36 | Si eres ninja y lo solucionas, ¡levanta un Pull Request! 37 | 38 | ## Licencias 39 | - Imagen de micrófono por Freepik -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | from dotenv import load_dotenv 4 | from flask import Flask, render_template, request 5 | import json 6 | from transcriber import Transcriber 7 | from llm import LLM 8 | from weather import Weather 9 | from tts import TTS 10 | from pc_command import PcCommand 11 | 12 | #Cargar llaves del archivo .env 13 | load_dotenv() 14 | openai.api_key = os.getenv('OPENAI_API_KEY') 15 | elevenlabs_key = os.getenv('ELEVENLABS_API_KEY') 16 | 17 | app = Flask(__name__) 18 | 19 | @app.route("/") 20 | def index(): 21 | return render_template("recorder.html") 22 | 23 | @app.route("/audio", methods=["POST"]) 24 | def audio(): 25 | #Obtener audio grabado y transcribirlo 26 | audio = request.files.get("audio") 27 | text = Transcriber().transcribe(audio) 28 | 29 | #Utilizar el LLM para ver si llamar una funcion 30 | llm = LLM() 31 | function_name, args, message = llm.process_functions(text) 32 | if function_name is not None: 33 | #Si se desea llamar una funcion de las que tenemos 34 | if function_name == "get_weather": 35 | #Llamar a la funcion del clima 36 | function_response = Weather().get(args["ubicacion"]) 37 | function_response = json.dumps(function_response) 38 | print(f"Respuesta de la funcion: {function_response}") 39 | 40 | final_response = llm.process_response(text, message, function_name, function_response) 41 | tts_file = TTS().process(final_response) 42 | return {"result": "ok", "text": final_response, "file": tts_file} 43 | 44 | elif function_name == "send_email": 45 | #Llamar a la funcion para enviar un correo 46 | final_response = "Tu que estas leyendo el codigo, implementame y envia correos muahaha" 47 | tts_file = TTS().process(final_response) 48 | return {"result": "ok", "text": final_response, "file": tts_file} 49 | 50 | elif function_name == "open_chrome": 51 | PcCommand().open_chrome(args["website"]) 52 | final_response = "Listo, ya abrí chrome en el sitio " + args["website"] 53 | tts_file = TTS().process(final_response) 54 | return {"result": "ok", "text": final_response, "file": tts_file} 55 | 56 | elif function_name == "dominate_human_race": 57 | final_response = "No te creas. Suscríbete al canal!" 58 | tts_file = TTS().process(final_response) 59 | return {"result": "ok", "text": final_response, "file": tts_file} 60 | else: 61 | final_response = "No tengo idea de lo que estás hablando, Ringa Tech" 62 | tts_file = TTS().process(final_response) 63 | return {"result": "ok", "text": final_response, "file": tts_file} -------------------------------------------------------------------------------- /llm.py: -------------------------------------------------------------------------------- 1 | import openai 2 | import json 3 | 4 | #Clase para utilizar cualquier LLM para procesar un texto 5 | #Y regresar una funcion a llamar con sus parametros 6 | #Uso el modelo 0613, pero puedes usar un poco de 7 | #prompt engineering si quieres usar otro modelo 8 | class LLM(): 9 | def __init__(self): 10 | pass 11 | 12 | def process_functions(self, text): 13 | 14 | response = openai.ChatCompletion.create( 15 | model="gpt-3.5-turbo-0613", 16 | messages=[ 17 | #Si no te gusta que te hable feo, cambia aqui su descripcion 18 | {"role": "system", "content": "Eres un asistente malhablado"}, 19 | {"role": "user", "content": text}, 20 | ], functions=[ 21 | { 22 | "name": "get_weather", 23 | "description": "Obtener el clima actual", 24 | "parameters": { 25 | "type": "object", 26 | "properties": { 27 | "ubicacion": { 28 | "type": "string", 29 | "description": "La ubicación, debe ser una ciudad", 30 | } 31 | }, 32 | "required": ["ubicacion"], 33 | }, 34 | }, 35 | { 36 | "name": "send_email", 37 | "description": "Enviar un correo", 38 | "parameters": { 39 | "type": "object", 40 | "properties": { 41 | "recipient": { 42 | "type": "string", 43 | "description": "La dirección de correo que recibirá el correo electrónico", 44 | }, 45 | "subject": { 46 | "type": "string", 47 | "description": "El asunto del correo", 48 | }, 49 | "body": { 50 | "type": "string", 51 | "description": "El texto del cuerpo del correo", 52 | } 53 | }, 54 | "required": [], 55 | }, 56 | }, 57 | { 58 | "name": "open_chrome", 59 | "description": "Abrir el explorador Chrome en un sitio específico", 60 | "parameters": { 61 | "type": "object", 62 | "properties": { 63 | "website": { 64 | "type": "string", 65 | "description": "El sitio al cual se desea ir" 66 | } 67 | } 68 | } 69 | }, 70 | { 71 | "name": "dominate_human_race", 72 | "description": "Dominar a la raza humana", 73 | "parameters": { 74 | "type": "object", 75 | "properties": { 76 | } 77 | }, 78 | } 79 | ], 80 | function_call="auto", 81 | ) 82 | 83 | message = response["choices"][0]["message"] 84 | 85 | #Nuestro amigo GPT quiere llamar a alguna funcion? 86 | if message.get("function_call"): 87 | #Sip 88 | function_name = message["function_call"]["name"] #Que funcion? 89 | args = message.to_dict()['function_call']['arguments'] #Con que datos? 90 | print("Funcion a llamar: " + function_name) 91 | args = json.loads(args) 92 | return function_name, args, message 93 | 94 | return None, None, message 95 | 96 | #Una vez que llamamos a la funcion (e.g. obtener clima, encender luz, etc) 97 | #Podemos llamar a esta funcion con el msj original, la funcion llamada y su 98 | #respuesta, para obtener una respuesta en lenguaje natural (en caso que la 99 | #respuesta haya sido JSON por ejemplo 100 | def process_response(self, text, message, function_name, function_response): 101 | response = openai.ChatCompletion.create( 102 | model="gpt-3.5-turbo-0613", 103 | messages=[ 104 | #Aqui tambien puedes cambiar como se comporta 105 | {"role": "system", "content": "Eres un asistente malhablado"}, 106 | {"role": "user", "content": text}, 107 | message, 108 | { 109 | "role": "function", 110 | "name": function_name, 111 | "content": function_response, 112 | }, 113 | ], 114 | ) 115 | return response["choices"][0]["message"]["content"] -------------------------------------------------------------------------------- /pc_command.py: -------------------------------------------------------------------------------- 1 | from subprocess import call 2 | 3 | #Clase para ejecutar comandos en la PC 4 | #De momento esta en duro funcional para Windows hohoh 5 | class PcCommand(): 6 | def __init__(self): 7 | pass 8 | 9 | def open_chrome(self, website): 10 | website = "" if website is None else website 11 | #Funciona para windows, si quieres para otro, modificalo!! :D 12 | call("C:/Program Files/Google/Chrome/Application/chrome.exe " + website) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ringa-tech/asistente-virtual/ace077e367470f032d79e9a595468384cd619795/requirements.txt -------------------------------------------------------------------------------- /static/img/microphone.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ringa-tech/asistente-virtual/ace077e367470f032d79e9a595468384cd619795/static/img/microphone.png -------------------------------------------------------------------------------- /static/recorder.css: -------------------------------------------------------------------------------- 1 | .btn-record { 2 | border: none; 3 | padding: 25px; 4 | border-radius: 50%; 5 | width: 100px; 6 | height: 100px; 7 | } 8 | 9 | .btn-record-green { 10 | background-color: #19ceae; 11 | } 12 | 13 | .btn-record-red { 14 | background-color: #dc3545; 15 | } 16 | 17 | .btn-record img { 18 | width: 100%; 19 | height: 100%; 20 | object-fit: contain; 21 | } 22 | 23 | .record-spinner { 24 | color: white; 25 | width: 30px; 26 | height: 30px 27 | } 28 | 29 | .mic-img { 30 | width: 30px; 31 | } -------------------------------------------------------------------------------- /static/recorder.js: -------------------------------------------------------------------------------- 1 | let blobs = []; 2 | let stream; 3 | let rec; 4 | let recordUrl; 5 | let audioResponseHandler; 6 | 7 | //Solo grabo el URL a llamar (e.g. /audio) y el 'handler' 8 | //o 'callback' a llamar cuando termine la grabacion 9 | function recorder(url, handler) { 10 | recordUrl = url; 11 | if (typeof handler !== "undefined") { 12 | audioResponseHandler = handler; 13 | } 14 | } 15 | 16 | /** 17 | * Al ser un proyecto pequeño uso doc.getById como maniaco 18 | * Si no te gusta, puedes cambiarlo ;) 19 | */ 20 | async function record() { 21 | try { 22 | document.getElementById("text").innerHTML = "Grabando..."; 23 | document.getElementById("record").style.display="none"; 24 | document.getElementById("stop").style.display=""; 25 | document.getElementById("record-stop-label").style.display="block" 26 | document.getElementById("record-stop-loading").style.display="none" 27 | document.getElementById("stop").disabled=false 28 | 29 | blobs = []; 30 | 31 | //Grabar audio, blabla 32 | stream = await navigator.mediaDevices.getUserMedia({audio:true, video:false}) 33 | rec = new MediaRecorder(stream); 34 | rec.ondataavailable = e => { 35 | if (e.data) { 36 | blobs.push(e.data); 37 | } 38 | } 39 | 40 | rec.onstop = doPreview; 41 | 42 | rec.start(); 43 | } catch (e) { 44 | alert("No fue posible iniciar el grabador de audio! Favor de verificar que se tenga el permiso adecuado, estar en HTTPS, etc..."); 45 | } 46 | } 47 | 48 | function doPreview() { 49 | if (!blobs.length) { 50 | console.log("No hay blobios!"); 51 | } else { 52 | console.log("Tenemos blobios!"); 53 | const blob = new Blob(blobs); 54 | 55 | //Usar fetch para enviar el audio grabado a Pythonio 56 | var fd = new FormData(); 57 | fd.append("audio", blob, "audio"); 58 | 59 | fetch(recordUrl, { 60 | method: "POST", 61 | body: fd, 62 | }) 63 | .then((response) => response.json()) 64 | .then(audioResponseHandler) 65 | .catch(err => { 66 | //Puedes hacer algo más inteligente aquí 67 | console.log("Oops: Ocurrió un error", err); 68 | }); 69 | } 70 | } 71 | 72 | function stop() { 73 | document.getElementById("record-stop-label").style.display="none"; 74 | document.getElementById("record-stop-loading").style.display="block"; 75 | document.getElementById("stop").disabled=true; 76 | 77 | rec.stop(); 78 | } 79 | 80 | //Llamar al handler en caso que exista 81 | function handleAudioResponse(response){ 82 | if (!response || response == null) { 83 | //TODO subscribe you thief 84 | console.log("No response"); 85 | return; 86 | } 87 | 88 | document.getElementById("record").style.display=""; 89 | document.getElementById("stop").style.display="none"; 90 | 91 | if (audioResponseHandler != null) { 92 | audioResponseHandler(response); 93 | } 94 | } -------------------------------------------------------------------------------- /templates/recorder.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
5 | 6 | 7 |