├── README.md ├── listener.rb └── spec └── listener.spec.rb /README.md: -------------------------------------------------------------------------------- 1 | # gpt-4-listen-and-summarize 2 | 3 | This Ruby script will watch a folder for new text files, summarize them with GPT (using gpt-4, gpt-3.5-turbo, or another model of your choosing, by updating `@model` in the script), and save them to a different folder. 4 | 5 | Breaks down very large text files into multiple smaller chunks (to get around the API's "max length") and stitches together the responses for you. 6 | 7 | I save mine right to my Obsidian vault but you might have other use cases. 8 | 9 | ## Setup 10 | 11 | 1. `gem install listen` 12 | 2. You'll need an [OpenAI API key](https://platform.openai.com/account/api-keys), set as an env variable (OPENAI-API-KEY) 13 | 3. `ruby listen.rb` 14 | 15 | Build using Ruby 3.2.1 16 | 17 | ``` 18 | ruby 3.2.1 (2023-02-08 revision 31819e82c8) [x86_64-darwin21] 19 | ``` 20 | -------------------------------------------------------------------------------- /listener.rb: -------------------------------------------------------------------------------- 1 | require "listen" 2 | require "net/http" 3 | require "uri" 4 | require "json" 5 | 6 | # options: 7 | @directory_to_watch = "/some/input/directory" 8 | @directory_to_write = "/some/output/directory" 9 | @append_original_text = true 10 | @api_key = ENV["OPENAI_API_KEY"] 11 | # use 'gpt-4', 'gpt-3.5-turbo', etc 12 | @model = "gpt-3.5-turbo" 13 | 14 | # code: 15 | def generate_file_name 16 | Time.now.strftime("%Y-%m-%d-%H-%M-%S") + ".md" 17 | end 18 | 19 | def request_gpt(chunk) 20 | uri = URI("https://api.openai.com/v1/chat/completions") 21 | request = Net::HTTP::Post.new(uri) 22 | request["Authorization"] = "Bearer #{@api_key}" 23 | request["Content-Type"] = "application/json" 24 | request.body = JSON.dump({ 25 | "messages" => [ 26 | { "role": "system", "content": "You are a helpful assistant that transcribes large text into summaries. Expound on your summaries with detailed paragraphs, pulling out key moments in the text. Keep your responses under 2000 characters" }, 27 | { "role": "user", "content": "Please summarize this into a few long paragraphs, under 2000 characters total:\n\n #{chunk}." }, 28 | ], 29 | "model" => @model, 30 | "temperature" => 0.5, 31 | "max_tokens" => 400, 32 | }) 33 | 34 | puts "calling API with a chunk" 35 | 36 | response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http| 37 | http.request(request) 38 | end 39 | 40 | JSON.parse(response.body) 41 | end 42 | 43 | def split_content(content) 44 | content.scan(/.{1,4800}/) 45 | end 46 | 47 | def process_file(file_path) 48 | content = File.read(file_path).gsub(/[\r\n]+/, " ") 49 | chunks = split_content(content) 50 | 51 | # In the process_file method 52 | gpt_responses = chunks.map do |chunk| 53 | puts "Sending chunk to #{@model} API" 54 | response = request_gpt(chunk) 55 | puts "Received chunk's #{@model} API response" 56 | response 57 | end 58 | 59 | # Before combining the responses 60 | puts "Combining #{@model} responses" 61 | result_content = gpt_responses.map do |response| 62 | if response["choices"] && response["choices"][0] && response["choices"][0]["message"] && response["choices"][0]["message"]["content"] 63 | response["choices"][0]["message"]["content"] 64 | else 65 | puts "Unexpected #{@model} response: #{response}" 66 | "" 67 | end 68 | end.join 69 | 70 | puts "Writing result to file" 71 | file_name = generate_file_name 72 | 73 | # Add metadata and Topics line to the output file 74 | metadata = <<~METADATA 75 | --- 76 | create_date: #{file_name.gsub(".md", "")} 77 | --- 78 | 79 | Topics:: [[Journal]], [[GPT Summaries]] 80 | 81 | --- 82 | 83 | METADATA 84 | 85 | if @append_original_text 86 | formatted_content = metadata + result_content + "\n\n---\n\nOriginal Text:\n\n" + content 87 | else 88 | formatted_content = metadata + result_content 89 | end 90 | 91 | # save file to disk 92 | File.write(File.join(@directory_to_write, file_name), formatted_content) 93 | puts "Done" 94 | end 95 | 96 | listener = Listen.to(@directory_to_watch) do |modified, added, removed| 97 | added.each do |added_file| 98 | if File.extname(added_file) == ".txt" 99 | process_file(added_file) 100 | end 101 | end 102 | end 103 | 104 | puts "listening for files in #{@directory_to_watch}" 105 | listener.start 106 | sleep 107 | -------------------------------------------------------------------------------- /spec/listener.spec.rb: -------------------------------------------------------------------------------- 1 | require "minitest/autorun" 2 | require "minitest/mock" 3 | require "tempfile" 4 | 5 | class TestTranscriptionScript < Minitest::Test 6 | def test_generate_file_name 7 | file_name = generate_file_name 8 | assert_match(/^\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2}\.md$/, file_name, "File name format is incorrect") 9 | end 10 | 11 | def test_split_content 12 | long_text = "a" * 10000 13 | chunks = split_content(long_text) 14 | assert_equal(3, chunks.size, "Split content should create 3 chunks") 15 | assert_equal(4800, chunks[0].size, "First chunk should have 4800 characters") 16 | assert_equal(4800, chunks[1].size, "Second chunk should have 4800 characters") 17 | assert_equal(400, chunks[2].size, "Third chunk should have 400 characters") 18 | end 19 | 20 | def test_process_file 21 | # Create a temporary input file 22 | input_file = Tempfile.new("test_input.txt") 23 | input_file.write("Example content") 24 | input_file.rewind 25 | 26 | # Mock API response 27 | mock_api_response = { 28 | "choices" => [ 29 | { 30 | "message" => { 31 | "role" => "assistant", 32 | "content" => "Mock summary", 33 | }, 34 | }, 35 | ], 36 | } 37 | 38 | # Mock request_gpt4 function 39 | mock_request_gpt4 = Minitest::Mock.new 40 | mock_request_gpt4.expect(:call, mock_api_response, [String]) 41 | 42 | # Mock File.write to prevent writing to the actual file system 43 | File.stub(:write, nil) do 44 | process_file(input_file.path, mock_request_gpt4) 45 | end 46 | 47 | # Ensure the mock_request_gpt4 function was called as expected 48 | mock_request_gpt4.verify 49 | 50 | input_file.close 51 | input_file.unlink 52 | end 53 | end 54 | --------------------------------------------------------------------------------