├── screenshot.png
├── add-to-init.el
├── jarvis.py
├── jarvis-chatgpt-api.py
└── README.md


/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackdoe/emacs-chatgpt-jarvis/HEAD/screenshot.png


--------------------------------------------------------------------------------
/add-to-init.el:
--------------------------------------------------------------------------------
 1 | (write-region "" nil "/tmp/jarvis-chatgpt.txt")
 2 | 
 3 | (require 'filenotify)
 4 | (generate-new-buffer "CHATGPT")
 5 | 
 6 | (defun my-jarvis-callback (event)
 7 |   (with-current-buffer "CHATGPT"
 8 |     (erase-buffer)
 9 |     (insert-file-contents "/tmp/jarvis-chatgpt.txt" nil 0 5000)
10 |     (goto-char (point-max))))
11 | 
12 | (file-notify-add-watch
13 |   "/tmp/jarvis-chatgpt.txt" '(change) 'my-jarvis-callback)
14 |       
15 | (defun send-selection-to-jarvis ()
16 |   (interactive)
17 |   (if (use-region-p)
18 |       (write-region (region-beginning) (region-end) "/tmp/jarvis-chatgpt-input.txt" 0)))
19 | (global-set-key (kbd "<f12>") 'send-selection-to-jarvis)
20 | 


--------------------------------------------------------------------------------
/jarvis.py:
--------------------------------------------------------------------------------
  1 | from pyaudio import PyAudio, paInt16
  2 | import wave, whisper, os, time
  3 | from pynput import keyboard
  4 | from chatgpt_wrapper import ChatGPT
  5 | 
  6 | LISTEN = False
  7 | OUTPUT = "/tmp/jarvis-chatgpt.txt"
  8 | RECORDING_FILE = "/tmp/jarvis-chatgpt.wav"
  9 | EXTRA_INPUT = "/tmp/jarvis-chatgpt-input.txt"
 10 | 
 11 | def on_press(key):
 12 |   global LISTEN
 13 |   if key ==  keyboard.Key.f12:
 14 |     LISTEN = True
 15 | 
 16 | def on_release(key):
 17 |   global LISTEN
 18 |   if key ==  keyboard.Key.f12:
 19 |     LISTEN = False
 20 | 
 21 | def out(t):
 22 |   with open(OUTPUT, "w") as f:
 23 |     f.write(t)
 24 | 
 25 | def read_extra_file():
 26 |   data = ''
 27 |   try:
 28 |     with open(EXTRA_INPUT, "r") as f:
 29 |       data = f.read()
 30 |   except:
 31 |     pass
 32 |   finally:
 33 |     try:
 34 |       os.remove(EXTRA_INPUT)
 35 |     except:
 36 |       pass
 37 |   return data
 38 | 
 39 | def microphone(name, seconds):
 40 |   with wave.open(name, 'wb') as wf:
 41 |     p = PyAudio()
 42 |     wf.setnchannels(1)
 43 |     sample = p.get_sample_size(paInt16)
 44 |     wf.setsampwidth(sample)
 45 |     wf.setframerate(44100)
 46 | 
 47 |     stream = p.open(format=paInt16,channels=1,rate=44100,input=True)
 48 | 
 49 |     chunks = 44100//1024*seconds
 50 |     for _ in range(0, chunks):
 51 |       wf.writeframes(stream.read(1024))
 52 |       if not LISTEN:
 53 |         break
 54 | 
 55 |     stream.close()
 56 |     p.terminate()
 57 | 
 58 | listener = keyboard.Listener(on_press=on_press,on_release=on_release)
 59 | listener.start()
 60 | 
 61 | bot = ChatGPT()
 62 | model = whisper.load_model("medium.en")
 63 | out("waiting, pres f12 to ask a question, region selection will be appended...")
 64 | print('...')
 65 | while True:
 66 |   if LISTEN:
 67 |     question = ''
 68 |     try:
 69 |       out("listening...")
 70 |       t0 = time.time()
 71 |       microphone(RECORDING_FILE, 60)
 72 |       if time.time() - t0 > 1:
 73 |         out("transcribing...")
 74 |         r = model.transcribe(RECORDING_FILE)
 75 |         question = r["text"]
 76 |       else:
 77 |         question = ''
 78 |     finally:
 79 |       try:
 80 |         os.remove(RECORDING_FILE)
 81 |       except:
 82 |         pass
 83 |     extra = read_extra_file()
 84 |     out(f"decoded: {question}\n{extra}\nasking chatgpt...")
 85 | 
 86 |     bot.refresh_session()
 87 |     stream  = bot.ask_stream(f"""You are the best software developer in the world, most experienced in go and python, answer the following question:
 88 | 
 89 | {question}
 90 | {extra}
 91 | """)
 92 | 
 93 |     response = f"# QUESTION:\n{question}\n{extra}\n# CHARTPT START\n"
 94 |     for chunk in stream:
 95 |       response += chunk
 96 |       out(response)
 97 |     response += '\n# CHATGPT END\n'
 98 |     out(response)
 99 | 
100 |   time.sleep(0.01)
101 | 


--------------------------------------------------------------------------------
/jarvis-chatgpt-api.py:
--------------------------------------------------------------------------------
  1 | import openai
  2 | from pyaudio import PyAudio, paInt16
  3 | import wave, whisper, os, time
  4 | from pynput import keyboard
  5 | from threading import Thread,Event
  6 | from itertools import cycle
  7 | import traceback
  8 | 
  9 | LISTEN = False
 10 | OUTPUT = "/tmp/jarvis-chatgpt.txt"
 11 | RECORDING_FILE = "/tmp/jarvis-chatgpt.wav"
 12 | EXTRA_INPUT = "/tmp/jarvis-chatgpt-input.txt"
 13 | DONE = Event()
 14 | openai.api_key = os.getenv("OPENAI_API_KEY")
 15 | 
 16 | def on_press(key):
 17 |   global LISTEN
 18 |   if key ==  keyboard.Key.f12:
 19 |     LISTEN = True
 20 | 
 21 | def on_release(key):
 22 |   global LISTEN
 23 |   if key ==  keyboard.Key.f12:
 24 |     LISTEN = False
 25 | 
 26 | def out(t):
 27 |   with open(OUTPUT, "w") as f:
 28 |     f.write(t)
 29 | 
 30 | def read_extra_file():
 31 |   data = ''
 32 |   try:
 33 |     with open(EXTRA_INPUT, "r") as f:
 34 |       data = f.read()
 35 |   except:
 36 |     pass
 37 |   finally:
 38 |     try:
 39 |       os.remove(EXTRA_INPUT)
 40 |     except:
 41 |       pass
 42 |   return data
 43 | 
 44 | def microphone(name, seconds):
 45 |   with wave.open(name, 'wb') as wf:
 46 |     p = PyAudio()
 47 |     wf.setnchannels(1)
 48 |     sample = p.get_sample_size(paInt16)
 49 |     wf.setsampwidth(sample)
 50 |     wf.setframerate(44100)
 51 | 
 52 |     stream = p.open(format=paInt16,channels=1,rate=44100,input=True)
 53 | 
 54 |     chunks = 44100//1024*seconds
 55 |     for _ in range(0, chunks):
 56 |       wf.writeframes(stream.read(1024))
 57 |       if not LISTEN:
 58 |         break
 59 | 
 60 |     stream.close()
 61 |     p.terminate()
 62 | 
 63 | def waiting(question, extra):
 64 |   spinner = cycle(list('|/-\\'))
 65 |   while not DONE.is_set():
 66 |     out(f"decoded: {question}\n{extra}\nasking chatgpt... " + next(spinner))
 67 |     DONE.wait(timeout=0.1)
 68 |   
 69 | listener = keyboard.Listener(on_press=on_press,on_release=on_release)
 70 | listener.start()
 71 | 
 72 | 
 73 | model = whisper.load_model("medium.en")
 74 | out("waiting, pres f12 to ask a question, region selection will be appended...")
 75 | print('...')
 76 | 
 77 | while True:
 78 |   if LISTEN:
 79 |     question = ''
 80 |     try:
 81 |       out("listening...")
 82 |       t0 = time.time()
 83 |       microphone(RECORDING_FILE, 60)
 84 |       if time.time() - t0 > 1:
 85 |         out("transcribing...")
 86 |         r = model.transcribe(RECORDING_FILE)
 87 |         question = r["text"]
 88 |       else:
 89 |         question = ''
 90 |     finally:
 91 |       try:
 92 |         os.remove(RECORDING_FILE)
 93 |       except:
 94 |         pass
 95 | 
 96 |     extra = read_extra_file()
 97 | 
 98 |     DONE.clear()
 99 |     t0 = Thread(target=waiting, args=(question, extra,))
100 |     t0.start()
101 | 
102 |     response = f"# QUESTION:\n{question}\n{extra}\n# CHATGPT START\n"
103 | 
104 |     try:
105 |       chatgpt_request = f"{question}\n{extra}"
106 |       completion = openai.ChatCompletion.create(
107 |         model="gpt-3.5-turbo", 
108 |         messages=[
109 |           {"role": "system", "content": "You are the best software developer in the world, most experienced in go and python, answer the following question:"},
110 |           {"role": "user", "content": chatgpt_request}
111 |         ]
112 |       )
113 | 
114 |       response += completion.choices[0].message.content
115 |       response += '\n# CHATGPT END\n'
116 | 
117 |     except Exception as e:
118 |       exception_stack = traceback.format_exc()
119 |       response = f"Error: {str(e)}\n\n{exception_stack}"
120 |     finally:
121 |       DONE.set()
122 |       t0.join()
123 |       out(response)
124 | 
125 |   time.sleep(0.01)
126 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PROOF OF CONCEPT
 2 | 
 3 | > I always dreamed of not leaving emacs, with eww and chatgpt I am closer than ever.
 4 | 
 5 | This is a proof of concept program that allows you to use voice to interact with chatgpt and see the result in emacs.
 6 | 
 7 | To transcribe your speech using OpenAI's Whisper (it uses it locally, your voice is not sent anywhere), press the F12 key to start recording. Whisper will continue to transcribe your speech until you release the F12 key. After recording, ask ChatGPT for assistance and print the output in the buffer.
 8 | 
 9 | __It stores the recording (up to 60 seconds) in /tmp/jarvis-chatgpt.wav__
10 | 
11 | Youtube videos showing how it feels:
12 | 
13 | * using the chatgpt api (instead the headless browser wrapper) to explanin and refactor code:
14 | 
15 | [![example explain and refactor code](https://img.youtube.com/vi/KX0ZaXcgXNc/0.jpg)](https://www.youtube.com/watch?v=KX0ZaXcgXNc "example explain and refactor")
16 | 
17 | * asking a question:
18 | 
19 | [![example question](https://img.youtube.com/vi/P-5RBdM9X-8/0.jpg)](https://www.youtube.com/watch?v=P-5RBdM9X-8 "example question")
20 | 
21 | * refactor code
22 | 
23 | [![example refactor](https://img.youtube.com/vi/uWJ8-JU0aXY/0.jpg)](https://www.youtube.com/watch?v=uWJ8-JU0aXY "example refactor")
24 | 
25 | * rewrite text
26 | 
27 | [![example rewrite](https://img.youtube.com/vi/4Jyhs6SfFl0/0.jpg)](https://www.youtube.com/watch?v=4Jyhs6SfFl0 "example rewrite")
28 | 
29 | # Install
30 | 
31 | The whisper/pyaudio/chatgpt-wrapper are a bit more involved than pip install, whisper needs ffmpeg for example, so its best to follow the instructions on their homepages:
32 | 
33 | * pip install pynput
34 | * install https://github.com/openai/whisper
35 | * install https://pypi.org/project/PyAudio/ (on windows its just pip install pyaudio)
36 | * install https://github.com/mmabrouk/chatgpt-wrapper or `pip install openai` if you have an api key from openai: https://platform.openai.com/account/api-keys
37 | 
38 | Edit jarvis.py if you want to use another key
39 | 
40 | # Running and Using jarvis
41 | 
42 | * If you have an API key, export it as OPENAI_API_KEY and use `jarvis-chatgpt-api.py` instead of `jarvis.py`
43 | * Run `python jarvis.py` in your terminal. Note that the first time you run it, the `medium.en` model, which is 1.4GB in size, will be downloaded. It may take up to a minute to load the model.
44 | * Open Emacs and navigate to the CHATGPT buffer.
45 | * Press the F12 key to ask a question. If you have a region of text selected, it will be saved to `/tmp/jarvis-chatgpt-input.txt` and appended to your question. For example, if you want to ask Jarvis to "refactor this code", select the code and then press F12.
46 | * If you press F12 for less than a second it will just send the selected region to ChatGPT.
47 | 
48 | Add this to your init.el in order to keep watching the jarvis-chatgpt.txt file:
49 | 
50 | ```
51 | (write-region "" nil "/tmp/jarvis-chatgpt.txt")
52 | 
53 | (require 'filenotify)
54 | (generate-new-buffer "CHATGPT")
55 | 
56 | (defun my-jarvis-callback (event)
57 |   (with-current-buffer "CHATGPT"
58 |     (erase-buffer)
59 |     (insert-file-contents "/tmp/jarvis-chatgpt.txt" nil 0 5000)
60 |     (goto-char (point-max))))
61 | 
62 | (file-notify-add-watch
63 |   "/tmp/jarvis-chatgpt.txt" '(change) 'my-jarvis-callback)
64 | 
65 | (defun send-selection-to-jarvis ()
66 |   (interactive)
67 |   (if (use-region-p)
68 |       (write-region (region-beginning) (region-end) "/tmp/jarvis-chatgpt-input.txt" 0)))
69 | (global-set-key (kbd "<f12>") 'send-selection-to-jarvis)
70 | ```
71 | 
72 | # using jarvis.py outside of emacs
73 | 
74 | The interface for Jarvis is quite simple: it listens for the F12 keypress, records audio input, transcribes it, and sends the resulting text to the ChatGPT model to get an answer. Additionally, if there is any text in "/tmp/jarvis-chatgpt-input.txt", it will be appended to the question being asked.
75 | 
76 | The format of the question is as follows:
77 | 
78 | ```
79 | You are the best software developer in the world, most experienced in go and python, answer the following question:
80 | 
81 | {transcribed question from the microphone}
82 | {data from jarvis-chatgpt-input.txt}
83 | ```
84 | 
85 | The output from ChatGPT is saved to `/tmp/jarvis-chatgpt.txt`, which is overwritten one chunk at a time. You can use inotify to monitor this file and re-read its contents as needed.
86 | 
87 | # How it looks
88 | 
89 | ![screenshot.png](screenshot.png)
90 | 
91 | > Dont judge my emacs theme.
92 | 
93 | # BUGS
94 | 
95 | Because openai does not have API, chatgpt-wrapper uses the active firefox session via playwright, which is not super reliable, sometimes you might have to restart jarvis.py, so if you can get a key from openai and use `jarvis-chatgpt-api.py`
96 | 


--------------------------------------------------------------------------------