├── README.md
├── listener.rb
└── spec
    └── listener.spec.rb


/README.md:
--------------------------------------------------------------------------------
 1 | # gpt-4-listen-and-summarize
 2 | 
 3 | This Ruby script will watch a folder for new text files, summarize them with GPT (using gpt-4, gpt-3.5-turbo, or another model of your choosing, by updating `@model` in the script), and save them to a different folder.
 4 | 
 5 | Breaks down very large text files into multiple smaller chunks (to get around the API's "max length") and stitches together the responses for you.
 6 | 
 7 | I save mine right to my Obsidian vault but you might have other use cases.
 8 | 
 9 | ## Setup
10 | 
11 | 1. `gem install listen`
12 | 2. You'll need an [OpenAI API key](https://platform.openai.com/account/api-keys), set as an env variable (OPENAI-API-KEY)
13 | 3. `ruby listen.rb`
14 | 
15 | Build using Ruby 3.2.1
16 | 
17 | ```
18 | ruby 3.2.1 (2023-02-08 revision 31819e82c8) [x86_64-darwin21]
19 | ```
20 | 


--------------------------------------------------------------------------------
/listener.rb:
--------------------------------------------------------------------------------
  1 | require "listen"
  2 | require "net/http"
  3 | require "uri"
  4 | require "json"
  5 | 
  6 | # options:
  7 | @directory_to_watch = "/some/input/directory"
  8 | @directory_to_write = "/some/output/directory"
  9 | @append_original_text = true
 10 | @api_key = ENV["OPENAI_API_KEY"]
 11 | # use 'gpt-4', 'gpt-3.5-turbo', etc
 12 | @model = "gpt-3.5-turbo"
 13 | 
 14 | # code:
 15 | def generate_file_name
 16 |   Time.now.strftime("%Y-%m-%d-%H-%M-%S") + ".md"
 17 | end
 18 | 
 19 | def request_gpt(chunk)
 20 |   uri = URI("https://api.openai.com/v1/chat/completions")
 21 |   request = Net::HTTP::Post.new(uri)
 22 |   request["Authorization"] = "Bearer #{@api_key}"
 23 |   request["Content-Type"] = "application/json"
 24 |   request.body = JSON.dump({
 25 |     "messages" => [
 26 |       { "role": "system", "content": "You are a helpful assistant that transcribes large text into summaries. Expound on your summaries with detailed paragraphs, pulling out key moments in the text. Keep your responses under 2000 characters" },
 27 |       { "role": "user", "content": "Please summarize this into a few long paragraphs, under 2000 characters total:\n\n #{chunk}." },
 28 |     ],
 29 |     "model" => @model,
 30 |     "temperature" => 0.5,
 31 |     "max_tokens" => 400,
 32 |   })
 33 | 
 34 |   puts "calling API with a chunk"
 35 | 
 36 |   response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
 37 |     http.request(request)
 38 |   end
 39 | 
 40 |   JSON.parse(response.body)
 41 | end
 42 | 
 43 | def split_content(content)
 44 |   content.scan(/.{1,4800}/)
 45 | end
 46 | 
 47 | def process_file(file_path)
 48 |   content = File.read(file_path).gsub(/[\r\n]+/, " ")
 49 |   chunks = split_content(content)
 50 | 
 51 |   # In the process_file method
 52 |   gpt_responses = chunks.map do |chunk|
 53 |     puts "Sending chunk to #{@model} API"
 54 |     response = request_gpt(chunk)
 55 |     puts "Received chunk's #{@model} API response"
 56 |     response
 57 |   end
 58 | 
 59 |   # Before combining the responses
 60 |   puts "Combining #{@model} responses"
 61 |   result_content = gpt_responses.map do |response|
 62 |     if response["choices"] && response["choices"][0] && response["choices"][0]["message"] && response["choices"][0]["message"]["content"]
 63 |       response["choices"][0]["message"]["content"]
 64 |     else
 65 |       puts "Unexpected #{@model} response: #{response}"
 66 |       ""
 67 |     end
 68 |   end.join
 69 | 
 70 |   puts "Writing result to file"
 71 |   file_name = generate_file_name
 72 | 
 73 |   # Add metadata and Topics line to the output file
 74 |   metadata = <<~METADATA
 75 |     ---
 76 |     create_date: #{file_name.gsub(".md", "")}
 77 |     ---
 78 | 
 79 |     Topics:: [[Journal]], [[GPT Summaries]]
 80 | 
 81 |     ---
 82 | 
 83 |   METADATA
 84 | 
 85 |   if @append_original_text
 86 |     formatted_content = metadata + result_content + "\n\n---\n\nOriginal Text:\n\n" + content
 87 |   else
 88 |     formatted_content = metadata + result_content
 89 |   end
 90 | 
 91 |   # save file to disk
 92 |   File.write(File.join(@directory_to_write, file_name), formatted_content)
 93 |   puts "Done"
 94 | end
 95 | 
 96 | listener = Listen.to(@directory_to_watch) do |modified, added, removed|
 97 |   added.each do |added_file|
 98 |     if File.extname(added_file) == ".txt"
 99 |       process_file(added_file)
100 |     end
101 |   end
102 | end
103 | 
104 | puts "listening for files in #{@directory_to_watch}"
105 | listener.start
106 | sleep
107 | 


--------------------------------------------------------------------------------
/spec/listener.spec.rb:
--------------------------------------------------------------------------------
 1 | require "minitest/autorun"
 2 | require "minitest/mock"
 3 | require "tempfile"
 4 | 
 5 | class TestTranscriptionScript < Minitest::Test
 6 |   def test_generate_file_name
 7 |     file_name = generate_file_name
 8 |     assert_match(/^\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2}\.md$/, file_name, "File name format is incorrect")
 9 |   end
10 | 
11 |   def test_split_content
12 |     long_text = "a" * 10000
13 |     chunks = split_content(long_text)
14 |     assert_equal(3, chunks.size, "Split content should create 3 chunks")
15 |     assert_equal(4800, chunks[0].size, "First chunk should have 4800 characters")
16 |     assert_equal(4800, chunks[1].size, "Second chunk should have 4800 characters")
17 |     assert_equal(400, chunks[2].size, "Third chunk should have 400 characters")
18 |   end
19 | 
20 |   def test_process_file
21 |     # Create a temporary input file
22 |     input_file = Tempfile.new("test_input.txt")
23 |     input_file.write("Example content")
24 |     input_file.rewind
25 | 
26 |     # Mock API response
27 |     mock_api_response = {
28 |       "choices" => [
29 |         {
30 |           "message" => {
31 |             "role" => "assistant",
32 |             "content" => "Mock summary",
33 |           },
34 |         },
35 |       ],
36 |     }
37 | 
38 |     # Mock request_gpt4 function
39 |     mock_request_gpt4 = Minitest::Mock.new
40 |     mock_request_gpt4.expect(:call, mock_api_response, [String])
41 | 
42 |     # Mock File.write to prevent writing to the actual file system
43 |     File.stub(:write, nil) do
44 |       process_file(input_file.path, mock_request_gpt4)
45 |     end
46 | 
47 |     # Ensure the mock_request_gpt4 function was called as expected
48 |     mock_request_gpt4.verify
49 | 
50 |     input_file.close
51 |     input_file.unlink
52 |   end
53 | end
54 | 


--------------------------------------------------------------------------------