├── screenshot.png ├── add-to-init.el ├── jarvis.py ├── jarvis-chatgpt-api.py └── README.md /screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackdoe/emacs-chatgpt-jarvis/HEAD/screenshot.png -------------------------------------------------------------------------------- /add-to-init.el: -------------------------------------------------------------------------------- 1 | (write-region "" nil "/tmp/jarvis-chatgpt.txt") 2 | 3 | (require 'filenotify) 4 | (generate-new-buffer "CHATGPT") 5 | 6 | (defun my-jarvis-callback (event) 7 | (with-current-buffer "CHATGPT" 8 | (erase-buffer) 9 | (insert-file-contents "/tmp/jarvis-chatgpt.txt" nil 0 5000) 10 | (goto-char (point-max)))) 11 | 12 | (file-notify-add-watch 13 | "/tmp/jarvis-chatgpt.txt" '(change) 'my-jarvis-callback) 14 | 15 | (defun send-selection-to-jarvis () 16 | (interactive) 17 | (if (use-region-p) 18 | (write-region (region-beginning) (region-end) "/tmp/jarvis-chatgpt-input.txt" 0))) 19 | (global-set-key (kbd "") 'send-selection-to-jarvis) 20 | -------------------------------------------------------------------------------- /jarvis.py: -------------------------------------------------------------------------------- 1 | from pyaudio import PyAudio, paInt16 2 | import wave, whisper, os, time 3 | from pynput import keyboard 4 | from chatgpt_wrapper import ChatGPT 5 | 6 | LISTEN = False 7 | OUTPUT = "/tmp/jarvis-chatgpt.txt" 8 | RECORDING_FILE = "/tmp/jarvis-chatgpt.wav" 9 | EXTRA_INPUT = "/tmp/jarvis-chatgpt-input.txt" 10 | 11 | def on_press(key): 12 | global LISTEN 13 | if key == keyboard.Key.f12: 14 | LISTEN = True 15 | 16 | def on_release(key): 17 | global LISTEN 18 | if key == keyboard.Key.f12: 19 | LISTEN = False 20 | 21 | def out(t): 22 | with open(OUTPUT, "w") as f: 23 | f.write(t) 24 | 25 | def read_extra_file(): 26 | data = '' 27 | try: 28 | with open(EXTRA_INPUT, "r") as f: 29 | data = f.read() 30 | except: 31 | pass 32 | finally: 33 | try: 34 | os.remove(EXTRA_INPUT) 35 | except: 36 | pass 37 | return data 38 | 39 | def microphone(name, seconds): 40 | with wave.open(name, 'wb') as wf: 41 | p = PyAudio() 42 | wf.setnchannels(1) 43 | sample = p.get_sample_size(paInt16) 44 | wf.setsampwidth(sample) 45 | wf.setframerate(44100) 46 | 47 | stream = p.open(format=paInt16,channels=1,rate=44100,input=True) 48 | 49 | chunks = 44100//1024*seconds 50 | for _ in range(0, chunks): 51 | wf.writeframes(stream.read(1024)) 52 | if not LISTEN: 53 | break 54 | 55 | stream.close() 56 | p.terminate() 57 | 58 | listener = keyboard.Listener(on_press=on_press,on_release=on_release) 59 | listener.start() 60 | 61 | bot = ChatGPT() 62 | model = whisper.load_model("medium.en") 63 | out("waiting, pres f12 to ask a question, region selection will be appended...") 64 | print('...') 65 | while True: 66 | if LISTEN: 67 | question = '' 68 | try: 69 | out("listening...") 70 | t0 = time.time() 71 | microphone(RECORDING_FILE, 60) 72 | if time.time() - t0 > 1: 73 | out("transcribing...") 74 | r = model.transcribe(RECORDING_FILE) 75 | question = r["text"] 76 | else: 77 | question = '' 78 | finally: 79 | try: 80 | os.remove(RECORDING_FILE) 81 | except: 82 | pass 83 | extra = read_extra_file() 84 | out(f"decoded: {question}\n{extra}\nasking chatgpt...") 85 | 86 | bot.refresh_session() 87 | stream = bot.ask_stream(f"""You are the best software developer in the world, most experienced in go and python, answer the following question: 88 | 89 | {question} 90 | {extra} 91 | """) 92 | 93 | response = f"# QUESTION:\n{question}\n{extra}\n# CHARTPT START\n" 94 | for chunk in stream: 95 | response += chunk 96 | out(response) 97 | response += '\n# CHATGPT END\n' 98 | out(response) 99 | 100 | time.sleep(0.01) 101 | -------------------------------------------------------------------------------- /jarvis-chatgpt-api.py: -------------------------------------------------------------------------------- 1 | import openai 2 | from pyaudio import PyAudio, paInt16 3 | import wave, whisper, os, time 4 | from pynput import keyboard 5 | from threading import Thread,Event 6 | from itertools import cycle 7 | import traceback 8 | 9 | LISTEN = False 10 | OUTPUT = "/tmp/jarvis-chatgpt.txt" 11 | RECORDING_FILE = "/tmp/jarvis-chatgpt.wav" 12 | EXTRA_INPUT = "/tmp/jarvis-chatgpt-input.txt" 13 | DONE = Event() 14 | openai.api_key = os.getenv("OPENAI_API_KEY") 15 | 16 | def on_press(key): 17 | global LISTEN 18 | if key == keyboard.Key.f12: 19 | LISTEN = True 20 | 21 | def on_release(key): 22 | global LISTEN 23 | if key == keyboard.Key.f12: 24 | LISTEN = False 25 | 26 | def out(t): 27 | with open(OUTPUT, "w") as f: 28 | f.write(t) 29 | 30 | def read_extra_file(): 31 | data = '' 32 | try: 33 | with open(EXTRA_INPUT, "r") as f: 34 | data = f.read() 35 | except: 36 | pass 37 | finally: 38 | try: 39 | os.remove(EXTRA_INPUT) 40 | except: 41 | pass 42 | return data 43 | 44 | def microphone(name, seconds): 45 | with wave.open(name, 'wb') as wf: 46 | p = PyAudio() 47 | wf.setnchannels(1) 48 | sample = p.get_sample_size(paInt16) 49 | wf.setsampwidth(sample) 50 | wf.setframerate(44100) 51 | 52 | stream = p.open(format=paInt16,channels=1,rate=44100,input=True) 53 | 54 | chunks = 44100//1024*seconds 55 | for _ in range(0, chunks): 56 | wf.writeframes(stream.read(1024)) 57 | if not LISTEN: 58 | break 59 | 60 | stream.close() 61 | p.terminate() 62 | 63 | def waiting(question, extra): 64 | spinner = cycle(list('|/-\\')) 65 | while not DONE.is_set(): 66 | out(f"decoded: {question}\n{extra}\nasking chatgpt... " + next(spinner)) 67 | DONE.wait(timeout=0.1) 68 | 69 | listener = keyboard.Listener(on_press=on_press,on_release=on_release) 70 | listener.start() 71 | 72 | 73 | model = whisper.load_model("medium.en") 74 | out("waiting, pres f12 to ask a question, region selection will be appended...") 75 | print('...') 76 | 77 | while True: 78 | if LISTEN: 79 | question = '' 80 | try: 81 | out("listening...") 82 | t0 = time.time() 83 | microphone(RECORDING_FILE, 60) 84 | if time.time() - t0 > 1: 85 | out("transcribing...") 86 | r = model.transcribe(RECORDING_FILE) 87 | question = r["text"] 88 | else: 89 | question = '' 90 | finally: 91 | try: 92 | os.remove(RECORDING_FILE) 93 | except: 94 | pass 95 | 96 | extra = read_extra_file() 97 | 98 | DONE.clear() 99 | t0 = Thread(target=waiting, args=(question, extra,)) 100 | t0.start() 101 | 102 | response = f"# QUESTION:\n{question}\n{extra}\n# CHATGPT START\n" 103 | 104 | try: 105 | chatgpt_request = f"{question}\n{extra}" 106 | completion = openai.ChatCompletion.create( 107 | model="gpt-3.5-turbo", 108 | messages=[ 109 | {"role": "system", "content": "You are the best software developer in the world, most experienced in go and python, answer the following question:"}, 110 | {"role": "user", "content": chatgpt_request} 111 | ] 112 | ) 113 | 114 | response += completion.choices[0].message.content 115 | response += '\n# CHATGPT END\n' 116 | 117 | except Exception as e: 118 | exception_stack = traceback.format_exc() 119 | response = f"Error: {str(e)}\n\n{exception_stack}" 120 | finally: 121 | DONE.set() 122 | t0.join() 123 | out(response) 124 | 125 | time.sleep(0.01) 126 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PROOF OF CONCEPT 2 | 3 | > I always dreamed of not leaving emacs, with eww and chatgpt I am closer than ever. 4 | 5 | This is a proof of concept program that allows you to use voice to interact with chatgpt and see the result in emacs. 6 | 7 | To transcribe your speech using OpenAI's Whisper (it uses it locally, your voice is not sent anywhere), press the F12 key to start recording. Whisper will continue to transcribe your speech until you release the F12 key. After recording, ask ChatGPT for assistance and print the output in the buffer. 8 | 9 | __It stores the recording (up to 60 seconds) in /tmp/jarvis-chatgpt.wav__ 10 | 11 | Youtube videos showing how it feels: 12 | 13 | * using the chatgpt api (instead the headless browser wrapper) to explanin and refactor code: 14 | 15 | [![example explain and refactor code](https://img.youtube.com/vi/KX0ZaXcgXNc/0.jpg)](https://www.youtube.com/watch?v=KX0ZaXcgXNc "example explain and refactor") 16 | 17 | * asking a question: 18 | 19 | [![example question](https://img.youtube.com/vi/P-5RBdM9X-8/0.jpg)](https://www.youtube.com/watch?v=P-5RBdM9X-8 "example question") 20 | 21 | * refactor code 22 | 23 | [![example refactor](https://img.youtube.com/vi/uWJ8-JU0aXY/0.jpg)](https://www.youtube.com/watch?v=uWJ8-JU0aXY "example refactor") 24 | 25 | * rewrite text 26 | 27 | [![example rewrite](https://img.youtube.com/vi/4Jyhs6SfFl0/0.jpg)](https://www.youtube.com/watch?v=4Jyhs6SfFl0 "example rewrite") 28 | 29 | # Install 30 | 31 | The whisper/pyaudio/chatgpt-wrapper are a bit more involved than pip install, whisper needs ffmpeg for example, so its best to follow the instructions on their homepages: 32 | 33 | * pip install pynput 34 | * install https://github.com/openai/whisper 35 | * install https://pypi.org/project/PyAudio/ (on windows its just pip install pyaudio) 36 | * install https://github.com/mmabrouk/chatgpt-wrapper or `pip install openai` if you have an api key from openai: https://platform.openai.com/account/api-keys 37 | 38 | Edit jarvis.py if you want to use another key 39 | 40 | # Running and Using jarvis 41 | 42 | * If you have an API key, export it as OPENAI_API_KEY and use `jarvis-chatgpt-api.py` instead of `jarvis.py` 43 | * Run `python jarvis.py` in your terminal. Note that the first time you run it, the `medium.en` model, which is 1.4GB in size, will be downloaded. It may take up to a minute to load the model. 44 | * Open Emacs and navigate to the CHATGPT buffer. 45 | * Press the F12 key to ask a question. If you have a region of text selected, it will be saved to `/tmp/jarvis-chatgpt-input.txt` and appended to your question. For example, if you want to ask Jarvis to "refactor this code", select the code and then press F12. 46 | * If you press F12 for less than a second it will just send the selected region to ChatGPT. 47 | 48 | Add this to your init.el in order to keep watching the jarvis-chatgpt.txt file: 49 | 50 | ``` 51 | (write-region "" nil "/tmp/jarvis-chatgpt.txt") 52 | 53 | (require 'filenotify) 54 | (generate-new-buffer "CHATGPT") 55 | 56 | (defun my-jarvis-callback (event) 57 | (with-current-buffer "CHATGPT" 58 | (erase-buffer) 59 | (insert-file-contents "/tmp/jarvis-chatgpt.txt" nil 0 5000) 60 | (goto-char (point-max)))) 61 | 62 | (file-notify-add-watch 63 | "/tmp/jarvis-chatgpt.txt" '(change) 'my-jarvis-callback) 64 | 65 | (defun send-selection-to-jarvis () 66 | (interactive) 67 | (if (use-region-p) 68 | (write-region (region-beginning) (region-end) "/tmp/jarvis-chatgpt-input.txt" 0))) 69 | (global-set-key (kbd "") 'send-selection-to-jarvis) 70 | ``` 71 | 72 | # using jarvis.py outside of emacs 73 | 74 | The interface for Jarvis is quite simple: it listens for the F12 keypress, records audio input, transcribes it, and sends the resulting text to the ChatGPT model to get an answer. Additionally, if there is any text in "/tmp/jarvis-chatgpt-input.txt", it will be appended to the question being asked. 75 | 76 | The format of the question is as follows: 77 | 78 | ``` 79 | You are the best software developer in the world, most experienced in go and python, answer the following question: 80 | 81 | {transcribed question from the microphone} 82 | {data from jarvis-chatgpt-input.txt} 83 | ``` 84 | 85 | The output from ChatGPT is saved to `/tmp/jarvis-chatgpt.txt`, which is overwritten one chunk at a time. You can use inotify to monitor this file and re-read its contents as needed. 86 | 87 | # How it looks 88 | 89 | ![screenshot.png](screenshot.png) 90 | 91 | > Dont judge my emacs theme. 92 | 93 | # BUGS 94 | 95 | Because openai does not have API, chatgpt-wrapper uses the active firefox session via playwright, which is not super reliable, sometimes you might have to restart jarvis.py, so if you can get a key from openai and use `jarvis-chatgpt-api.py` 96 | --------------------------------------------------------------------------------