├── .gitignore
├── .tool-versions
├── Gemfile
├── Gemfile.lock
├── LICENSE
├── README.md
├── ai_template.rb
├── ai_templates
    ├── determine_best_link_url.txt
    ├── instructions_to_commands.txt
    ├── overall_best_answer.txt
    ├── page_command_question.txt
    └── summarize_session.txt
├── commander.rb
├── html_cleaner.rb
└── run.rb


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.gem
 2 | *.rbc
 3 | /.config
 4 | /coverage/
 5 | /InstalledFiles
 6 | /pkg/
 7 | /spec/reports/
 8 | /spec/examples.txt
 9 | /test/tmp/
10 | /test/version_tmp/
11 | /tmp/
12 | 
13 | # Used by dotenv library to load environment variables.
14 | .env
15 | 
16 | .DS_Store
17 | 
18 | # Ignore Byebug command history file.
19 | .byebug_history
20 | 
21 | ## Specific to RubyMotion:
22 | .dat*
23 | .repl_history
24 | build/
25 | *.bridgesupport
26 | build-iPhoneOS/
27 | build-iPhoneSimulator/
28 | 
29 | ## Specific to RubyMotion (use of CocoaPods):
30 | #
31 | # We recommend against adding the Pods directory to your .gitignore. However
32 | # you should judge for yourself, the pros and cons are mentioned at:
33 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
34 | #
35 | # vendor/Pods/
36 | 
37 | ## Documentation cache and generated files:
38 | /.yardoc/
39 | /_yardoc/
40 | /doc/
41 | /rdoc/
42 | 
43 | ## Environment normalization:
44 | /.bundle/
45 | /vendor/bundle
46 | /lib/bundler/man/
47 | 
48 | # for a library or gem, you might want to ignore these files since the code is
49 | # intended to run in multiple environments; otherwise, check them in:
50 | # Gemfile.lock
51 | # .ruby-version
52 | # .ruby-gemset
53 | 
54 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
55 | .rvmrc
56 | 
57 | # Used by RuboCop. Remote config files pulled in from inherit_from directive.
58 | # .rubocop-https?--*
59 | 


--------------------------------------------------------------------------------
/.tool-versions:
--------------------------------------------------------------------------------
1 | ruby 2.6.8
2 | nodejs 16.14.0


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
 1 | source 'https://rubygems.org'
 2 | gem "activesupport"
 3 | gem "awesome_print"
 4 | gem "colorize" # colors and bold in terminal output
 5 | gem "dotenv"
 6 | gem "httparty"
 7 | gem "nokogiri"
 8 | gem "puppeteer-ruby"
 9 | gem "rmagick"
10 | gem "similar_text" # fuzzy match strings by their similarity
11 | gem "tty-box" # boxes in terminal output
12 | gem "tty-font" # large fonts in terminal output
13 | gem "tty-screen" # learn about current terminal size
14 | 
15 | 


--------------------------------------------------------------------------------
/Gemfile.lock:
--------------------------------------------------------------------------------
 1 | GEM
 2 |   remote: https://rubygems.org/
 3 |   specs:
 4 |     activesupport (5.2.8)
 5 |       concurrent-ruby (~> 1.0, >= 1.0.2)
 6 |       i18n (>= 0.7, < 2)
 7 |       minitest (~> 5.1)
 8 |       tzinfo (~> 1.1)
 9 |     awesome_print (1.9.2)
10 |     colorize (0.8.1)
11 |     concurrent-ruby (1.1.10)
12 |     dotenv (2.7.6)
13 |     httparty (0.20.0)
14 |       mime-types (~> 3.0)
15 |       multi_xml (>= 0.5.2)
16 |     i18n (1.10.0)
17 |       concurrent-ruby (~> 1.0)
18 |     mime-types (3.4.1)
19 |       mime-types-data (~> 3.2015)
20 |     mime-types-data (3.2022.0105)
21 |     mini_portile2 (2.8.0)
22 |     minitest (5.15.0)
23 |     multi_xml (0.6.0)
24 |     nokogiri (1.13.8)
25 |       mini_portile2 (~> 2.8.0)
26 |       racc (~> 1.4)
27 |     pastel (0.8.0)
28 |       tty-color (~> 0.5)
29 |     puppeteer-ruby (0.40.7)
30 |       concurrent-ruby (~> 1.1.0)
31 |       mime-types (>= 3.0)
32 |       websocket-driver (>= 0.6.0)
33 |     racc (1.6.0)
34 |     rmagick (4.3.0)
35 |     similar_text (0.0.4)
36 |     strings (0.2.1)
37 |       strings-ansi (~> 0.2)
38 |       unicode-display_width (>= 1.5, < 3.0)
39 |       unicode_utils (~> 1.4)
40 |     strings-ansi (0.2.0)
41 |     thread_safe (0.3.6)
42 |     tty-box (0.7.0)
43 |       pastel (~> 0.8)
44 |       strings (~> 0.2.0)
45 |       tty-cursor (~> 0.7)
46 |     tty-color (0.6.0)
47 |     tty-cursor (0.7.1)
48 |     tty-font (0.5.0)
49 |     tty-screen (0.8.1)
50 |     tzinfo (1.2.9)
51 |       thread_safe (~> 0.1)
52 |     unicode-display_width (2.1.0)
53 |     unicode_utils (1.4.0)
54 |     websocket-driver (0.7.5)
55 |       websocket-extensions (>= 0.1.0)
56 |     websocket-extensions (0.1.5)
57 | 
58 | PLATFORMS
59 |   ruby
60 | 
61 | DEPENDENCIES
62 |   activesupport
63 |   awesome_print
64 |   colorize
65 |   dotenv
66 |   httparty
67 |   nokogiri
68 |   puppeteer-ruby
69 |   rmagick
70 |   similar_text
71 |   tty-box
72 |   tty-font
73 |   tty-screen
74 | 
75 | BUNDLED WITH
76 |    2.1.2
77 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Joe Heitzeberg
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ruby-gpt3-commander
 2 | 
 3 | This is Ruby code for controlling a browser using GPT-3.
 4 | 
 5 | ### INITIAL SETUP:
 6 | 
 7 | Run `bundle install` to install the dependencies.  You'll need Puppeteer (headless Chrome) and that might be  NodeJS thing.
 8 | 
 9 | Create a `.env` file and add `OPEN_AI_SECRET_KEY=xxxxxx` where `xxxxxx` is your GPT-3 API key. You'll need the Codex models if you want to run things out of the box.
10 | 
11 | ### HOW THIS WORKS
12 | 
13 | `run.rb` is a simple terminal application where you can enter your goal, like "what are the most expensive shoes in the world" and the AI will figure out how to use a web browser to get the result on it's own.
14 | 
15 | `ai_template.rb` implements a very simple way to read parameterized GPT-3 prompts from the file system under `/ai_templates` which contain all the standard GPT-3 params and {{anthing}}, {{you}}, {{want}} as, you guessed it, {{mustache}} variables. The class `AiTemplate` has methods for passing values to your templates to replace those variables and call OpenAi's API for you.
16 | 
17 | `commander.rb` implements a very narrow set of browser commands -- go click, search and question -- and executes those commands. In most cases, it uses GPT-3 to help form the commands and to execute them -- using simple prompt chaining.
18 | 
19 | `html_cleaner.rb` is a class to take HTML and simplify it so it's less verbose. This is important because GPT-3 prompts are limited in their size.
20 | 
21 | 
22 | ### RUNNING THIS
23 | 
24 | Just run `ruby run.rb` and follow the prompts.
25 | 
26 | ### DISCLAIMERS / ASKS
27 | 
28 | It would be pretty awesome if someone who understands Puppeteer better than I do would fix and build up those aspects, to add more functionality. It would be pretty powerful to have the AI be able to fill out forms and such.
29 | 
30 | It probably won't be long until you run into this: https://www.perimeterx.com/downloads/product-briefs/PerimeterX-Product-Brief-Human-Challenge.pdf
31 | 
32 | My personal website: https://currentlyobsessed.com/


--------------------------------------------------------------------------------
/ai_template.rb:
--------------------------------------------------------------------------------
  1 | # This class represents a GPT-3 prompt but with simple {{slugs}}
  2 | # so that one can easily replace them with the appropriate values
  3 | # at runtime.  This allows for useful prompt reuse with dynamic inputs.
  4 | #
  5 | # This class also handles calling into GPT-3 and to get the results
  6 | # and fetching the AI Template data from flat files (in the /ai_templates dir)
  7 | 
  8 | class AiTemplate
  9 |   # Each AI Template has a few attributes:
 10 |   # - name: the name of the template.
 11 |   # - token: a unique short identifier for the template which is also the file name and key.
 12 |   # - description: a short description of the template for humans.
 13 |   # - temperature: the GPT-3 temperature to use for this prompt.
 14 |   # - engine: the GPT-3 engine to use for this prompt.
 15 |   # - n: the GPT-3 number of result to return for this prompt.
 16 |   # - top_p: the GPT-3 top_p to use for this prompt.
 17 |   # - frequency_penalty: the GPT-3 frequency_penalty to use for this prompt.
 18 |   # - presence_penalty: the GPT-3 presence_penalty to use for this prompt.
 19 |   # - stop_strs: stop strings separated by "~".
 20 |   #
 21 |   # Create those attributes:
 22 |   attr_accessor :name, :token, :description, :temperature, :engine, :n, :top_p, :frequency_penalty, :presence_penalty, :max_tokens, :stop_strs, :prompt
 23 |   
 24 |   def self.run!(token:, params:)
 25 |     AiTemplate.load(token: token).run(params: params)
 26 |   end
 27 | 
 28 |   # Run the template with the provided parameters
 29 |   def run(params:)
 30 |     prompt_replaced = replace_params(params: params)
 31 | 
 32 |     # call into GPT-3
 33 |     open_ai_gtp3_url = "https://api.openai.com/v1/engines/#{self.engine}/completions"
 34 | 
 35 |     stop_strs_array = nil
 36 |     stop_strs_array = self.stop_strs.split("~").map {|s| s.gsub("\\n", "\n")} if self.stop_strs.present?
 37 | 
 38 |     open_ai_params = {
 39 |       prompt: prompt_replaced.strip,
 40 |       temperature: self.temperature.to_f,
 41 |       stop: stop_strs_array,
 42 |       top_p: self.top_p.to_f,
 43 |       n: self.n.to_i,
 44 |       stream: false,
 45 |       presence_penalty: self.presence_penalty.to_f,
 46 |       frequency_penalty: self.frequency_penalty.to_f,
 47 |       max_tokens: self.max_tokens.to_i
 48 |     }
 49 | 
 50 |     request_headers = {
 51 |       "Content-Type" => "application/json",
 52 |       "Authorization" => "Bearer #{ENV['OPEN_AI_SECRET_KEY']}"
 53 |     }
 54 | 
 55 |     # avoid "You didn't provide an API key. You need to provide your API key in an Authorization header using Bearer auth (i.e. Authorization: Bearer YOUR_KEY)"
 56 |     if ENV['OPEN_AI_SECRET_KEY'].nil?
 57 |       puts "You need to set the OPEN_AI_SECRET_KEY environment variable in .env to your OpenAI API key."
 58 |       exit 1
 59 |     end
 60 | 
 61 |     response = HTTParty.post(
 62 |       open_ai_gtp3_url,
 63 |       :body => JSON.dump(open_ai_params),
 64 |       :headers => request_headers,
 65 |       timeout: 60
 66 |     )
 67 | 
 68 |     if response.code != 200
 69 |       puts "Error calling OpenAI GPT-3 API: #{response.code} #{response.body}".red.bold
 70 |     end
 71 | 
 72 |     hash = JSON.parse(response.body)
 73 | 
 74 |     # return the first result, stripped for convenience.
 75 |     hash["choices"][0]["text"].strip if hash["choices"].present? && hash["choices"][0].present?
 76 |   end
 77 | 
 78 |   # Given our templates may be parameterized with {{variables}}
 79 |   # this method will find them and replace them with the provided values.
 80 |   def replace_params(params:)
 81 |     ready_prompt = self.prompt
 82 |     params.each do |key, value|
 83 |       ready_prompt = ready_prompt.gsub("{{#{key}}}", value)
 84 |     end
 85 | 
 86 |     # error if any unreplaced {{variables}} remain.
 87 |     # search for {{ + text + }}
 88 |     if ready_prompt =~ /{{.*}}/
 89 |       which_remain = ready_prompt.scan(/{{.*}}/)
 90 |       raise "Error: required prompt variables missing: #{which_remain.join(', ')}"
 91 |     end
 92 | 
 93 |     return ready_prompt
 94 |   end
 95 | 
 96 | 
 97 |   # Parse a template file.
 98 |   # Todo: error handling, smart defaults, etc.
 99 |   def self.parse_param(line:, param_name:, ai_template:)
100 |     if line && line.start_with?("#{param_name}:")
101 |       val = line.split(":")[1].to_s.strip
102 |       # set the attribute on the ai_template object:
103 |       ai_template.send("#{param_name}=", val)
104 |     end
105 |   end
106 | 
107 |   # look for the template.txt in the ai_templates folder with the name token
108 |   # and load the object.
109 |   # The file format is:
110 |   #  -- one or more lines of names params corresponding to GPT-3 prompt parameters.
111 |   #  -- a blank line
112 |   #  -- the rest of the file is the prompt text with {{slug}} variables for dynamic values.
113 |   #
114 |   def self.load(token:)
115 |     if token.nil?
116 |       raise "No token provided"
117 |     end
118 |     if !File.exist?("ai_templates/#{token}.txt")
119 |       raise "No template found for token #{token}"
120 |     end
121 |     text = File.read("ai_templates/#{token}.txt")
122 | 
123 |     # parse the file looking lines starting with the known attribute names + ":" and store the values
124 |     lines = text.split("\n")
125 |     ai_template = AiTemplate.new
126 |     ai_template.token = token
127 |     lines.each do |line|
128 |       parse_param(line: line, param_name: "name", ai_template: ai_template)
129 |       parse_param(line: line, param_name: "token", ai_template: ai_template)
130 |       parse_param(line: line, param_name: "max_tokens", ai_template: ai_template)
131 |       parse_param(line: line, param_name: "description", ai_template: ai_template)
132 |       parse_param(line: line, param_name: "temperature", ai_template: ai_template)
133 |       parse_param(line: line, param_name: "engine", ai_template: ai_template)
134 |       parse_param(line: line, param_name: "n", ai_template: ai_template)
135 |       parse_param(line: line, param_name: "top_p", ai_template: ai_template)
136 |       parse_param(line: line, param_name: "frequency_penalty", ai_template: ai_template)
137 |       parse_param(line: line, param_name: "presence_penalty", ai_template: ai_template)
138 |       parse_param(line: line, param_name: "stop_strs", ai_template: ai_template)      
139 |     end
140 | 
141 |     # parse everything after the first blank line into prompt:
142 |     blank_line_index = lines.index("")
143 |     ai_template.prompt = lines[blank_line_index..lines.length].join("\n").strip
144 | 
145 |     return ai_template
146 |   end
147 | 
148 |   # Quick validation of a template file.
149 |   # This will help if people try making their own template files.
150 |   def validate!
151 |     raise "missing a name" if self.name.nil?
152 |     raise "missing a description" if self.description.nil?
153 |     raise "missing a temperature" if self.temperature.nil?
154 |     raise "missing an engine" if self.engine.nil?
155 |     raise "missing an n" if self.n.nil?
156 |     raise "missing a top_p" if self.top_p.nil?
157 |     raise "missing a frequency_penalty" if self.frequency_penalty.nil?
158 |     raise "missing a presence_penalty" if self.presence_penalty.nil?
159 |     raise "missing a stop_strs" if self.stop_strs.nil?
160 |   end
161 | 
162 | end


--------------------------------------------------------------------------------
/ai_templates/determine_best_link_url.txt:
--------------------------------------------------------------------------------
 1 | name: Determine Best Link URL
 2 | description: Enter a description of a link on the page and we'll find the url of the best link in the html for it.
 3 | temperature: 0.0
 4 | engine: code-davinci-002
 5 | n: 1
 6 | top_p: 1.0
 7 | frequency_penalty: 0.0
 8 | presence_penalty: 0.0
 9 | max_tokens: 400
10 | stop_strs: ```
11 | 
12 | An advanced AI performs automations over a browser session to figure out the best link to click given some raw HTML.
13 | The previous command executed was:
14 | {{history}}
15 | 
16 | Given the HTML below, find the URL of the link that is most likely to be the best page for "{{description}}" and is not a link to somewhere on google.com.
17 | The html:
18 | ```
19 | {{html}}
20 | ```
21 | Return the result as a valid JSON string like {anchor: "$(the anchor text)", url: "$(the url)"}
22 | Result:
23 | ```


--------------------------------------------------------------------------------
/ai_templates/instructions_to_commands.txt:
--------------------------------------------------------------------------------
 1 | name: Instructions to Commands
 2 | description: Interpret commands
 3 | temperature: 0.0
 4 | engine: code-davinci-002
 5 | n: 1
 6 | top_p: 1.0
 7 | frequency_penalty: 0.0
 8 | presence_penalty: 0.0
 9 | max_tokens: 350
10 | stop_strs: ```
11 | 
12 | # AI BROWSER CONTROLLER OVERVIEW
13 | 
14 | An artificially intelligent browser extension helps people surf the web.
15 | It works by taking a simple command and expanding it into actions that are performed by the web browser.
16 | The AI attempts to behave like a human would when using the web to research topics, by asking good questions.
17 | For example, given the human's original question, the AI might ask 2 more related questions.
18 | 
19 | # STANDARD COMMANDS
20 | 
21 | The standard commands that the AI outputs are as follows:
22 | 
23 | go: $(a url in standard format)
24 | click: $(the name of a link to click)
25 | question: $(an english language question or questions to answer in detail)
26 | 
27 | # EXAMPLES OF HUMAN INPUT TO COMMANDER OUTPUT
28 | 
29 | # input:
30 | ```
31 | start an order for filet mignon on crowd cow
32 | ```
33 | 
34 | # output:
35 | go: https://www.crowdcow.com
36 | search: Filet Mignon
37 | click: filet mignon
38 | click: add to cart
39 | 
40 | # input:
41 | ```
42 | go to google
43 | search for instant soup
44 | click on 'shopping'
45 | which products are keto-friendly?
46 | are there any sugar-free products?
47 | ```
48 | 
49 | # output:
50 | go: https://www.google.com
51 | search: instant soup
52 | click: shopping
53 | question: which products are keto-friendly?
54 | question: are there any sugar-free products? which ones?
55 | 
56 | 
57 | # input:
58 | ```
59 | what are some keto-friendly products that are highly recommended?
60 | ```
61 | 
62 | # output:
63 | go: https://www.google.com
64 | search: the best keto-friendly products
65 | click: highly recommended keto-friendly products
66 | question: which products are listed as highly recommended?
67 | question: why are these products listed as highly recommended?
68 | question: how much do these products cost?
69 | 
70 | # input:
71 | ```
72 | {{input}}
73 | ```
74 | 
75 | # output:
76 | ```


--------------------------------------------------------------------------------
/ai_templates/overall_best_answer.txt:
--------------------------------------------------------------------------------
 1 | name: Answer combiner
 2 | description: Enter a list of answers and we'll combine into one answer
 3 | temperature: 0.0
 4 | engine: text-davinci-002
 5 | n: 1
 6 | top_p: 1.0
 7 | frequency_penalty: 0.0
 8 | presence_penalty: 0.0
 9 | max_tokens: 500
10 | stop_strs: Question:
11 | 
12 | Given a question and some answers, combine the answers into one single answer.
13 | Question: {{question}}
14 | Answers: {{answers}}
15 | Combined answer:


--------------------------------------------------------------------------------
/ai_templates/page_command_question.txt:
--------------------------------------------------------------------------------
 1 | name: Website answer generator 
 2 | description: Enter a website and we'll write the answer to a question about it.
 3 | temperature: 0.0
 4 | engine: text-davinci-002
 5 | n: 1
 6 | top_p: 1.0
 7 | frequency_penalty: 0.0
 8 | presence_penalty: 0.0
 9 | max_tokens: 500
10 | stop_strs: Question:
11 | 
12 | Answer a question using the contents of the webpage below, and inferring meaning from element names and css classes and through proximity to other elements on the page. 
13 | Return a single JSON hash with keys: question, answer
14 | ````
15 | {{page_content}}
16 | ````
17 | Question: {{question}}
18 | Answer:


--------------------------------------------------------------------------------
/ai_templates/summarize_session.txt:
--------------------------------------------------------------------------------
 1 | name: Summarize Session
 2 | description: Given the raw command + results of an automated AI browser session, summarize for humans what happened.
 3 | temperature: 0.2
 4 | engine: text-davinci-001
 5 | n: 1
 6 | top_p: 1.0
 7 | frequency_penalty: 0.1
 8 | presence_penalty: 0.1
 9 | max_tokens: 300
10 | stop_strs:
11 | 
12 | An artificially intelligent browser extension helps people surf the web.
13 | It works by taking a simple command and expanding it into actions that are performed by the web browser.
14 | The AI attempts to behave like a human would when using the web to research topics, by asking good questions.
15 | 
16 | Original command: 
17 | {{human_entries}}
18 | 
19 | Expanded commands and results:
20 | {{history}}
21 | 
22 | State the result that the AI found, and explain in detail why the result is valid (or not) and explain in English how it arrived at this result.


--------------------------------------------------------------------------------
/commander.rb:
--------------------------------------------------------------------------------
  1 | # A class that handles interfacing with Pupeteer
  2 | # to control a browser session.
  3 | require_relative 'html_cleaner'
  4 | require 'nokogiri'
  5 | require 'rmagick'
  6 | require 'securerandom'
  7 | require 'similar_text'
  8 | require 'tempfile'
  9 | 
 10 | class Commander
 11 |   DEFAULT_USER_AGENT = "Mozilla/5.0 (iPhone; CPU iPhone OS 14_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.4 Mobile/15E148 Safari/604.1"
 12 | 
 13 |   def self.page_command_search(page, text)
 14 |     text = text.strip
 15 |     text = text.gsub(/[\s\u00A0]/, ' ')
 16 | 
 17 |     # Special case: youtube
 18 |     if page.url.include?("youtube.com")
 19 |       uri_escaped = URI.escape(text)
 20 |       page.wait_for_navigation do
 21 |         page.goto("https://www.youtube.com/results?search_query=#{uri_escaped}", wait_until: "networkidle0")
 22 |       end
 23 |       return
 24 |     end
 25 | 
 26 |     # type "joe" in the text field.
 27 |     # Look for the form
 28 |     form = page.query_selector("form")
 29 |     if form.nil?
 30 |       text_field = page.query_selector("input[type=text]") || page.query_selector("input[type=search]")
 31 |     else
 32 |       text_field = form.query_selector("input[type=text]") || form.query_selector("input[type=search]")
 33 |     end
 34 | 
 35 |     if text_field.nil?
 36 |       puts "No text field found".red
 37 |       return
 38 |     end
 39 | 
 40 |     text_field.scroll_into_view_if_needed
 41 | 
 42 |     # Set the value to nothing.
 43 |     text_field.evaluate("b => b.value = ''")
 44 | 
 45 |     # type backspace to clear it
 46 |     text_field.type_text("\b")
 47 | 
 48 |     # type the new search
 49 |     page.keyboard.type_text(text)
 50 | 
 51 |     # wait for it to load
 52 |     page.wait_for_function('() => document.querySelector("*") !== null', timeout: 0) do
 53 |     end
 54 | 
 55 |     # type enter and let it navigate.
 56 |     page.keyboard.press("Enter")
 57 | 
 58 |     page.wait_for_function('() => document.querySelector("*") !== null', timeout: 0) do
 59 |     end
 60 | 
 61 |     sleep(1)
 62 |   end
 63 | 
 64 |   def self.click_link(page, link)
 65 |     url = link.evaluate("e => e.href")
 66 |     link_anchor_text = link.evaluate("e => e.innerText")
 67 |     page.goto(url, wait_until: "load", timeout: 0)
 68 |     page.wait_for_function('() => document.querySelector("*") !== null', timeout: 0) do
 69 |     end
 70 |     sleep(2)
 71 |   end
 72 | 
 73 |   # Ask the AI for the best URL to click given some HTML and some text with meaning.
 74 |   # last_history_text stores some context that might be helpful to the AI.
 75 |   # return {anchor:, url:} hash
 76 |   def self.page_command_smart_click(page, text, last_history_text)
 77 |     # Ask the AI to find the best link on the page for the given intent.
 78 |     html = page.content
 79 |     link_hash = Commander.determine_best_link_url(html, text, last_history_text)
 80 |     if link_hash.present?
 81 |       # Find the link in the page.
 82 |       link = page.query_selector("a[href='#{link_hash["url"]}']")
 83 |       if link.present?
 84 |         Commander.click_link(page, link)
 85 |       else
 86 |         Commander.page_command_navigate_to(page, link_hash[:url])
 87 |       end
 88 |       return link_hash
 89 |     else
 90 |       puts "No link found for: #{text}".red
 91 |     end    
 92 |   end
 93 | 
 94 |   def self.page_command_navigate_to(page, url)
 95 |     page.wait_for_navigation do
 96 |       page.goto(url, wait_until: "networkidle0")
 97 |     end
 98 |   end
 99 | 
100 |   # Return the text of the best link match as a hash {anchor:, url:}
101 |   def self.determine_best_link_url(html, description, last_history_text)
102 |     clean_html = HtmlCleaner.clean_html(html)
103 |     ai_template = AiTemplate.load(token: "determine_best_link_url")
104 |     parts = HtmlCleaner.split_for_open_ai(clean_html, ai_template.prompt, description)
105 | 
106 |     # For now, just pulling as much HTML as I can actually feed to a prompt.
107 |     # Another approach could be to run each html chunk and then use more AI
108 |     # to determine the best one.
109 |     part = parts.first
110 |     params = {
111 |       "html" => part,
112 |       "description" => description,
113 |       "history" => last_history_text
114 |     }
115 |     json_string = ai_template.run(
116 |       params: params
117 |     )
118 |     return JSON.parse(json_string.strip)
119 |   end
120 | 
121 |   def self.overall_best_answer(question, answers)
122 |     AiTemplate.run!(
123 |       token: "overall_best_answer", 
124 |       params: {
125 |         "question" => question,
126 |         "answers" => answers.join("\n")
127 |       }
128 |     )
129 |   end
130 | 
131 |   def self.page_command_question(clean_html, question, last_history_text)
132 |     ai_template = AiTemplate.load(token: "page_command_question")
133 |     # Since we can't put too many things into the OPENAI we split up the page content.
134 |     #
135 |     # OpenAI: or most models this is 2048 tokens, or about 1500 words
136 |     # One token is ~4 characters of text for common English text
137 | 
138 |     parts = HtmlCleaner.split_for_open_ai(clean_html, ai_template.prompt, question)
139 | 
140 |     answers = []
141 |     # Total Hack: only do the first 2
142 |     parts.first(2).each do |part_page_content|
143 |       results = ai_template.run(
144 |         params: {
145 |           "page_content" => part_page_content,
146 |           "question" => question
147 |         }
148 |       )  
149 |       answers << results        
150 |     end
151 | 
152 |     # find overall best answer
153 |     Commander.overall_best_answer(question, answers)
154 |   end
155 | 
156 |   # Given a pupeteer's browser page, run the command.
157 |   # Might return a string result, depending on the command type.
158 |   def self.run_command_on_page(page:, command:, history:)
159 |     result = ""
160 |     raw_html = page.content
161 |     clean_html = HtmlCleaner.clean_html(raw_html, page_title: page.title, page_url: page.url)      
162 | 
163 |     # Each command is a word ending in ":" with arguments that follow.
164 |     if command.include?(":")
165 |       # The action is everything before the first ":"
166 |       action = command.split(":")[0].strip
167 | 
168 |       # The args is everything after the first ":"
169 |       index = command.index(":")
170 |       args = command[index+1..-1].strip
171 |     else
172 |       action = command.split(" ").first
173 |       # remove the first occurrence of action from the command
174 |       args = command.split(" ").drop(1).join(" ")
175 |     end
176 | 
177 |     if action.blank?
178 |       puts "command: [#{command}]".red.bold
179 |       raise ("Command lacks action: #{command}")
180 |     end
181 | 
182 |     action_downcase = action.downcase
183 | 
184 |     # Pull the last command from history and format it for the AI
185 |     history_formatted = []
186 |     last_history = history.try(:last)
187 |     if last_history.present?
188 |       history_formatted << "# last command: #{last_history[:command]}"
189 |       history_formatted << "# last result: #{last_history[:result]}"
190 |     end
191 |     last_history_text = history_formatted.join("\n")
192 |     
193 |     case action_downcase
194 |     when "go"
195 |       # args is a URL.
196 |       args = "https://#{args}" if !args.start_with?("http")
197 |       Commander.page_command_navigate_to(page, args)
198 |       result = "Opened #{args}"
199 |     when "click"
200 |       # args is a string to click
201 |       link_hash = Commander.page_command_smart_click(page, args, last_history_text)
202 |       if link_hash.present?
203 |         if args != link_hash['anchor']
204 |           info = "Clicked \"#{args}\" -> #{link_hash['anchor']}"
205 |         else
206 |           info = "Clicked \"#{args}\""
207 |         end
208 |         domain = URI.parse(link_hash['url']).host
209 |         if domain.present?
210 |           info += " (#{domain})"
211 |         end
212 |         result = info
213 |       else
214 |         info = "Could not click \"#{args}\""
215 |         result = info
216 |       end
217 |     when "search"
218 |       Commander.page_command_search(page, args)
219 |       info = "Searched for \"#{args}\""
220 |       result = info
221 |     when "question"
222 |       answer = Commander.page_command_question(clean_html, args, last_history_text)
223 |       result = "Q: #{args}, A: #{answer}"
224 |     else
225 |       # Unknown command
226 |       result = "Unknown command! action: #{action}, args: #{args}"
227 |     end
228 |     return result
229 |   end
230 | end


--------------------------------------------------------------------------------
/html_cleaner.rb:
--------------------------------------------------------------------------------
  1 | # This class contains utilities to simplify HTML for use in size-limited GPT-3 prompts
  2 | # without sacrificing the meaning of the HTML.
  3 | # Why? Puppeteer surfs and pulls the HTML from pages it visits but the HTML can be quite verbose.
  4 | class HtmlCleaner
  5 | 
  6 |   BASIC_ELEMENTS = [ "p", "br", "span", "div", "td" ]
  7 | 
  8 |   LINKABLES = [
  9 |     "a",
 10 |     "link",
 11 |     "button",
 12 |     "btn"
 13 |   ]
 14 | 
 15 |   SIMPLER_ELEMENT_NAMES = {
 16 |     "a" => "link",
 17 |     "anchor" => "link",
 18 |   }
 19 | 
 20 |   CLASS_RENAMER = {
 21 |     "title" => "section",
 22 |     "btn" => "button",
 23 |   }
 24 | 
 25 |   # Some common classes that carry meaning (hence GPT-3 processing will care about) and should be preserved.
 26 |   CLASS_WHITELIST = [
 27 |     "button",
 28 |     "btn",
 29 |     "link",
 30 |     "input",
 31 |     "strikethrough",
 32 |     "title",
 33 |     "rank",
 34 |     "priority",
 35 |     "star",
 36 |     "rating",
 37 |     "review",
 38 |     "score",
 39 |     "price",
 40 |     "cost",
 41 |     "menu",
 42 |     "user",
 43 |     "date",
 44 |     "time",
 45 |     "page",
 46 |     "age",
 47 |     "month",
 48 |     "day",
 49 |     "year",
 50 |     "type",
 51 |     "category",
 52 |     "kind",
 53 |     "offer",
 54 |     "promo",
 55 |     "sale",
 56 |     "cart",
 57 |     "add",
 58 |     "image",
 59 |     "email",
 60 |     "street",
 61 |     "city",
 62 |     "cities",
 63 |     "zip",
 64 |     "postal",
 65 |     "country",
 66 |     "reservation",
 67 |     "availability",
 68 |     "quantity",
 69 |     "inventory",
 70 |     "product",
 71 |     "sku",
 72 |     "notify",
 73 |     "share",
 74 |     "important",
 75 |     "comment",
 76 |     "article",
 77 |     "venue",
 78 |     "location",
 79 |     "color",
 80 |     "footer",
 81 |     "skip",
 82 |     "next",
 83 |     "previous",
 84 |     "cuisine",
 85 |     "neighborhood",
 86 |   ]
 87 | 
 88 |   def self.simpler_element_name(element_name)
 89 |     if SIMPLER_ELEMENT_NAMES.key?(element_name.downcase)
 90 |       return SIMPLER_ELEMENT_NAMES[element_name.downcase].downcase
 91 |     else
 92 |       return element_name.downcase
 93 |     end
 94 |   end
 95 | 
 96 |   def self.simpler_class_name(class_name)
 97 |     if CLASS_RENAMER.key?(class_name.downcase)
 98 |       return CLASS_RENAMER[class_name.downcase].downcase
 99 |     else
100 |       return class_name.downcase
101 |     end
102 |   end
103 | 
104 |   def self.clean_classes(node)
105 |     pclass = node['class']
106 |     keeper_classes = []
107 |     if pclass.present?
108 |       classes = pclass.to_s.split(" ")
109 |       
110 |       # are any of classes (downcased) contained in any of LINKABLES?
111 |       found = classes.select {|class_name| LINKABLES.include?(class_name.downcase)}
112 |       if found.blank?
113 |         # if none of these are LINKABLES, then go up the tree looking for them, and add if found.
114 |         go_up = node.parent
115 |         go_up_classes = go_up['class'].to_s.split(" ")
116 |         while go_up.present?
117 |           go_up_name = go_up.name.downcase
118 |           if LINKABLES.include?(go_up_name)
119 |             if go_up_name == "a"
120 |               go_up_name = "link"
121 |             end
122 |             keeper_classes = [go_up_name] + keeper_classes
123 |             break
124 |           end
125 |           found = go_up_classes.select {|class_name| LINKABLES.include?(class_name.downcase)}
126 |           if found.present?
127 |             found.each do |found_class|
128 |               if found_class == "a"
129 |                 found_class = "link"
130 |               end
131 |               keeper_classes = [found_class] + keeper_classes
132 |             end
133 |             break
134 |           end
135 |           if node.parent == go_up
136 |             break
137 |           end
138 |           go_up = node.parent
139 |         end
140 |       end
141 | 
142 |       # only keep classes if they contain (as a substring) any of the words in the CLASS_WHITELIST
143 |       # and only keep the whitelist verion of the class (which is simpler / more compact semantics)
144 |       search_classes = CLASS_WHITELIST.dup
145 |       classes.each do |class_name|
146 |         search_classes.each do |wl_class|
147 |           if class_name.downcase.include?(wl_class.downcase)
148 |             keeper_classes << simpler_class_name(wl_class.downcase)
149 |             search_classes.delete(wl_class)
150 |           end
151 |         end
152 |       end
153 |       keeper_classes.uniq!
154 |     end
155 | 
156 |     return keeper_classes
157 |   end
158 | 
159 |   def self.clean_html(raw_html, page_title: nil, page_url: nil)
160 |     html = raw_html.encode('UTF-8', invalid: :replace, undef: :replace, replace: '', universal_newline: false).gsub(/\P{ASCII}/, '')
161 |     parser = Nokogiri::HTML(html, nil, Encoding::UTF_8.to_s)
162 |     parser.xpath('//script')&.remove
163 |     parser.xpath('//style')&.remove
164 | 
165 |     # Build the new doc as we go.
166 |     nodes_processed = []
167 | 
168 |     # parse the HTML into nodes
169 |     # and build into a tree of depth=2 where parents have children
170 |     # and the parents are in order.
171 |     # First, get all the leaf nodes in order.
172 |     leaf_nodes = []
173 |     parser.xpath('//*[not(*)]').each do |node|
174 |       leaf_nodes << node
175 |     end
176 | 
177 |     # Next, go through getting parents (in order) and build a data structure
178 |     # that will store the 1:n relation of parent to child/leaf.
179 |     parent_hashses = []
180 |     leaf_nodes.each do |node|
181 |       parent = node.parent
182 |       if parent.present?
183 |         # Find that parent in the parent_hashses
184 |         parent_index = parent_hashses.index { |h| h[:parent] == parent }
185 |         if parent_index.present?
186 |           # add this child to the parent array
187 |           parent_hashses[parent_index][:children] << node
188 |         else
189 |           # create a new parent hash
190 |           parent_hashses << { parent: parent, children: [node] }
191 |         end
192 |       end
193 |     end
194 | 
195 |     # Finally, go through and BUILD HTML:
196 |     build_html = []
197 |     parent_hashses.each do |parent_hash|
198 |       parent = parent_hash[:parent]
199 |       children = parent_hash[:children]
200 |       formatted = format_parent_and_chilren(parent, children)
201 |       if formatted.present?
202 |         build_html << formatted
203 |       end
204 |     end
205 | 
206 |     # Add a metatag at the top of the URL
207 |     if page_title.present? && page_url.present?
208 |       build_html.unshift("<meta name='og:title' content='#{page_title}' />")
209 |       build_html.unshift("<meta name='og:url' content='#{page_url}' />")
210 |     end
211 | 
212 |     # Print a few lines of the HTML for debugging purposes:
213 |     debug = false
214 |     if debug
215 |       puts " - - - - - - - - - ".white.bold
216 |       build_html.first(50).each do |line|
217 |         puts "     " + line.white
218 |       end
219 |       puts " - - - - - - - - - ".white.bold
220 |       puts ""
221 |     end
222 | 
223 |     # Return a complete list of all classes in the original HTML
224 |     #  and the new HTML.
225 |     original_classes = []
226 |     parser.xpath('//*').each do |node|
227 |       if node.attributes["class"].present?
228 |         class_string = node.attributes["class"].value
229 |         class_string.split(" ").each do |c|
230 |           original_classes << c
231 |         end
232 |       end
233 |     end
234 | 
235 |     build_html.join("\n")
236 |   end
237 | 
238 |   def self.format_parent_and_chilren(parent, children)
239 |     node_html = ""
240 |     if children.count > 1
241 |       keeper_classes = clean_classes(parent)
242 |       # Remove any classes that are equal to the element name.
243 |       #  <link class='link'>Top Rated</link> --> <link>Top Rated</link>
244 |       keeper_classes = keeper_classes.reject { |c| c.downcase == parent.name.downcase }
245 | 
246 |       parent_name = simpler_element_name(parent.name)
247 |       needs_parent = true
248 | 
249 |       if keeper_classes.blank?
250 |         if parent_name == "p" || parent_name == "br" || parent_name == "div" || parent_name == "span"
251 |           needs_parent = false
252 |         else
253 |           node_html << "<#{parent_name}>"
254 |         end
255 |       else
256 |         node_html << "<#{parent_name} class='#{keeper_classes.join(' ')}'>"
257 |       end
258 | 
259 |       children_html = ""
260 |       children.each do |child|
261 |         child_html = format_child_node(child)
262 |         if child_html.present?
263 |           child_html = "\n  " + child_html
264 |           children_html << child_html
265 |         end
266 |       end
267 |       if children_html.blank?
268 |         return ""
269 |       else
270 |         node_html << " #{children_html}"
271 |       end
272 |       if needs_parent
273 |         node_html << "\n</#{parent_name}>"
274 |       end
275 |     else
276 |       child_html = format_child_node(children.first)
277 |       if child_html.present?
278 |         node_html << child_html
279 |       end
280 |     end
281 |     node_html
282 |   end
283 | 
284 |   # Take a single parsed HTML node and reformat to something simpler.
285 |   def self.format_child_node(node)
286 |     element = simpler_element_name(node.name).downcase
287 |     # get the immedate text in the node (not children)
288 |     text = node.content.strip
289 | 
290 |     # if the text parent has signifiant classes, put them in as they may be hints as to the meaning.
291 |     keeper_classes = clean_classes(node)
292 | 
293 |     # Is this needed?
294 |     if keeper_classes.present? && element.blank?
295 |       element = "p"
296 |     end
297 | 
298 |     # If the element is p, br, div or span, "elevate" the first class name to be the element name.
299 |     # Examples:
300 |     #   <p class='score'>228 points</p> --> <score>228 points</score>
301 |     #   <div class='button time'>6:00PM</div> --> <button class='time'>6:00PM</button>
302 | 
303 |     if keeper_classes.present?
304 |       if BASIC_ELEMENTS.include?(element)
305 |         element = keeper_classes[0]
306 |         keeper_classes.shift # removes the first element
307 |       end
308 |     end
309 | 
310 |     # Remove any classes that are equal to the element name.
311 |     #  <link class='link'>Top Rated</link> --> <link>Top Rated</link>
312 |     keeper_classes = keeper_classes.reject { |c| c.downcase == element.downcase }
313 | 
314 |     text = text.strip
315 |     # if the text only contains ascii chars 32 and 160, then it's just whitespace.
316 |     if text.gsub(/[\s\u00A0]/, '').empty?
317 |       text = ""
318 |     end
319 | 
320 |     formatted = ""
321 | 
322 |     if text.blank?
323 |       return ""
324 |     end
325 | 
326 |     # If the element is H1, H2, H3, H4, H5, H6, then we don't need the class "section"
327 |     hs = ["h1", "h2", "h3", "h4", "h5", "h6"]
328 |     if hs.include?(element)
329 |       keeper_classes.delete("section")
330 |     end
331 | 
332 |     # If the element has no classes and is span | p | br | div | td, then just return the text.
333 |     if keeper_classes.blank? && BASIC_ELEMENTS.include?(element)
334 |       return text
335 |     end
336 | 
337 |     # If the element has no classes and the parent has multiple children, then just return the text.
338 |     #if keeper_classes.blank? && node.parent.children.count > 1
339 |     #  return text
340 |     #end
341 | 
342 |     if element.present?
343 |       keeper_classes_str = keeper_classes.join(' ')
344 |       href = ""
345 |       # if the node has href, add it back in to formatted.
346 |       if node.attributes["href"].present?
347 |         href = node.attributes["href"].value
348 |         href = " href='#{href}'"
349 |       end
350 |       
351 |       if keeper_classes_str.present?
352 |         formatted = "<#{element} class='#{keeper_classes_str}'#{href}>#{text}</#{element}>"
353 |       else
354 |         formatted = "<#{element}#{href}>#{text}</#{element}>"
355 |       end
356 |     else
357 |       formatted = text
358 |     end
359 |   
360 |     return formatted
361 |   end
362 | 
363 | 
364 |   # if HTML is too large to put into the parameter in a prompt,
365 |   # we split it so we can run the prompt multiple times and post-process
366 |   # the results.
367 |   def self.split_for_open_ai(clean_html, prompt, overhead)
368 |     open_ai_max_tokens = 2048
369 |     open_ai_max_chars = (open_ai_max_tokens * 4).to_i
370 |     safe_buffer = 100
371 |     max_chars = open_ai_max_chars - overhead.length - prompt.length - safe_buffer
372 |     split_html(clean_html, max: max_chars)
373 |   end
374 | 
375 |   # Split the HTML into several arrays where the length of each string
376 |   # is less than max, and do not split inside of tags.
377 |   # OpenAI: or most models this is 2048 tokens, or about 1500 words
378 |   # One token is ~4 characters of text for common English text
379 |   def self.split_html(clean_html, max: 3900)
380 | 
381 |     if clean_html.length <= max
382 |       return [clean_html]
383 |     end
384 | 
385 |     # use regex to split by closing tags
386 |     # keep the closing tag in results.
387 |     # instead of clean_html.split(/<\/[^>]+>/), use: ?= operator
388 |     groups = clean_html.split(/(?=<\/[^>]+>)/)
389 | 
390 |     ar_parts = []
391 |     cur_part_len = 0
392 |     next_chunk = []
393 |     groups.each do |bits, i|
394 |       cur_part_len = cur_part_len + bits.length
395 |       if cur_part_len < max
396 |         next_chunk << bits
397 |       else
398 |         ar_parts << next_chunk
399 |         next_chunk = []
400 |         next_chunk << bits
401 |         cur_part_len = bits.length
402 |       end
403 |     end
404 | 
405 |     if !ar_parts.include?(next_chunk)
406 |       ar_parts << next_chunk
407 |     end
408 | 
409 |     # Join all the parts
410 |     parts = []
411 |     ar_parts.each do |sub_array|
412 |       parts << sub_array.join(" ")
413 |     end  
414 | 
415 |     # Pull out the meta name='og:url' and meta name='og:title' tags.
416 |     parsed = Nokogiri::HTML(clean_html)
417 |     og_url = parsed.css("meta[name='og:url']").first.try(:attr, "content")
418 |     og_title = parsed.css("meta[name='og:title']").first.try(:attr, "content")
419 | 
420 |     # Add a meta page "page 1 of 2" to each part.
421 |     # And the og:url and og:title to pages 2+
422 |     total_parts = parts.length
423 |     parts = parts.map.with_index do |part, i|
424 |       if i >= 1
425 |         part = "<meta name=\"og:url\" content=\"#{og_url}\">\n" + part
426 |         part = "<meta name=\"og:title\" content=\"#{og_title}\">\n" + part
427 |       end
428 |       part = "<meta name=\"page\" content=\"#{i+1} of #{total_parts}\">\n" + part 
429 |     end
430 | 
431 |     parts
432 |   end
433 | end


--------------------------------------------------------------------------------
/run.rb:
--------------------------------------------------------------------------------
  1 | # a ruby class to execute a simple terminal driven UI for the commander app
  2 | # (AI driven browser control)
  3 | 
  4 | require_relative "ai_template"
  5 | require_relative "commander"
  6 | require "active_support/all"
  7 | require "awesome_print"
  8 | require "colorize"
  9 | require "dotenv"
 10 | require "httparty"
 11 | require "puppeteer-ruby"
 12 | require "tty-box"
 13 | require "tty-font"
 14 | require "tty-screen"
 15 | 
 16 | Dotenv.load
 17 | 
 18 | # Load all the templates in the ai_templates folder
 19 | # and validate that their formatting looks good.
 20 | def validate_templates!
 21 |   # Templates are stored in the ai_templates folder as {token}.txt files.
 22 |   # Iterate through files in that directory:
 23 |   errors = []
 24 |   Dir.foreach("ai_templates") do |filename|
 25 |     next if filename == "." || filename == ".."
 26 |     next if !filename.end_with?(".txt")
 27 |     # load the template and validate it:
 28 |     token = filename.split(".")[0]
 29 |     begin
 30 |       ai_template = AiTemplate.load(token: token)
 31 |       ai_template.validate!
 32 |     rescue => e
 33 |       errors << "Template #{token}.txt: #{e.message}"
 34 |     end
 35 |   end
 36 |   if errors.any?
 37 |     raise errors.join("\n")
 38 |   end
 39 | end
 40 | 
 41 | # throw an exception if the .env doesn't include
 42 | # required environment variables.
 43 | def validate_env!
 44 |   required_env_vars = %w(OPEN_AI_SECRET_KEY)
 45 |   missings = []
 46 | 
 47 |   # if the .env file is missing, tell them so.
 48 |   if !File.exist?(".env")
 49 |     raise "Error: missing .env file. Please create a .env file and fill in the values for #{required_env_vars.join(', ')}."
 50 |   end
 51 | 
 52 |   required_env_vars.each do |var|
 53 |     if ENV[var].nil?
 54 |       missings << var
 55 |     end
 56 |   end
 57 |   if missings.any?
 58 |     raise "Error: missing required environment variables: #{missings.join(', ')}"
 59 |   end
 60 | end
 61 | 
 62 | def print_instructions
 63 |   lines = []
 64 |   font = TTY::Font.new(:standard)
 65 |   title = font.write("Commander")
 66 |   # Colorize each line
 67 |   big_title = title.split("\n").map { |line| line.green.bold }.join("\n")
 68 | 
 69 |   lines << big_title
 70 |   lines << "This is a simple terminal driven UI for the Commander app."
 71 |   lines << "Commander is an AI driven browser control app."
 72 |   lines << "It uses OpenAI's GPT-3 API to generate commands for the browser."
 73 |   lines << "You can use it to automate tasks in your browser."
 74 |   lines << ""
 75 |   lines << "Instructions: Enter something.".white.bold
 76 |   puts TTY::Box.frame(lines.join("\n"), padding: 2, width: TTY::Screen.width)
 77 | end
 78 | 
 79 | def print_summary_of_session(human_entries:, history:)
 80 |   history_formatted_lines = []
 81 |   history.each_with_index do |h, i|
 82 |     history_formatted_lines << "Command #{i+1}: #{h[:command]}"
 83 |     history_formatted_lines << "Result #{i+1}: #{h[:result]}"
 84 |     history_formatted_lines << ""
 85 |   end
 86 |   history_formatted = history_formatted_lines.join("\n")
 87 | 
 88 |   template = AiTemplate.load(token: "summarize_session")
 89 |   params = {
 90 |     human_entries: human_entries.join("\n"),
 91 |     history: history_formatted
 92 |   }
 93 | 
 94 |   res = template.run(
 95 |     params: params
 96 |   )
 97 |   puts "=============================".red.bold
 98 |   puts template.replace_params(params: params)
 99 | 
100 |   summary_lines = []
101 |   summary_lines << "Original goal:".white.bold
102 |   summary_lines << human_entries.join("\n").yellow
103 |   summary_lines << ""
104 |   summary_lines << "Summary:".white.bold
105 |   summary_lines << res.green.bold
106 |   summary_lines << ""
107 |   summary_lines << "Raw AI steps taken:".white.bold
108 |   summary_lines << history_formatted.yellow
109 |   summary_lines << "" # needed because of a bug in TTY::Box?
110 |   
111 | 
112 |   # Big title font
113 |   font = TTY::Font.new(:standard)
114 |   title = font.write("Summary of Session")
115 |   # Colorize each line
116 |   big_title = title.split("\n").map { |line| line.white.bold }.join("\n")
117 | 
118 |   puts TTY::Box.frame(big_title, summary_lines.join("\n"), padding: 1, width: TTY::Screen.width)
119 | end
120 | 
121 | def ask(terminal_prompt, guide)
122 |   print terminal_prompt.white.bold + " <#{guide}> ".white
123 |   gets.chomp
124 | end
125 | 
126 | begin
127 |   validate_env!
128 |   validate_templates!
129 | rescue => e
130 |   puts TTY::Box.frame("Errors:", e, padding: 1, width: TTY::Screen.width)
131 |   exit 1
132 | end
133 | 
134 | print_instructions
135 | 
136 | # remember everything humans enter as we go.
137 | human_entries = []
138 | cmd = ask("What do you want to do?", "enter a goal")
139 | human_entries << cmd if cmd.present?
140 | 
141 | # The puppeteer session. Note that headless: false means that the browser will be visible.
142 | Puppeteer.launch(headless: false) do |browser|
143 |   page = browser.new_page
144 |   page.viewport = Puppeteer::Viewport.new(width: 800, height: 1200, device_scale_factor: 1.0)
145 |   page.set_user_agent(Commander::DEFAULT_USER_AGENT)
146 | 
147 |   if cmd.blank?
148 |     puts "using default command".white.bold
149 |     cmd = "what is the best omakase sushi experience in NYC?"
150 |     human_entries << cmd
151 |   end
152 | 
153 |   # start a loop to process commands and then ask for more commands
154 |   history = []
155 | 
156 | # The AI instructions_to_commands translates each line of our human instructions into a 
157 |   # simple, machine-readable commands that our engine can understand.
158 |   res = AiTemplate.run!(token: "instructions_to_commands", params: {input: cmd})
159 |   cmd_list = res.split("\n")
160 | 
161 |   lines = []
162 |   lines << "GIVEN:".white.bold
163 |   lines << cmd.yellow
164 |   lines << "PERFORM:".white.bold
165 |   lines << res.yellow
166 |   puts TTY::Box.frame(lines.join("\n"), e, padding: 1, width: TTY::Screen.width)
167 | 
168 |   # Here's where we run the command.
169 |   # Sometimes it's helpful to give GPT-3 prompts some context such as the last command and results
170 |   # In case that helps the AI figure out what to do for the current command. Thus, we pipe those in. 
171 |   cmd_list.each do |command|
172 |     result = Commander.run_command_on_page(page: page, command: command, history: history)
173 |     lines  = []
174 |     lines << "COMMAND: ".yellow.bold + " " + command.yellow
175 |     lines << "RESULT: ".white.bold + " " + result.white
176 |     history << {
177 |       command: command,
178 |       result: result
179 |     }
180 |     puts TTY::Box.frame(lines.join("\n"), e, padding: 1, width: TTY::Screen.width)
181 |   end
182 | 
183 |   print_summary_of_session(human_entries: human_entries, history: history)
184 | 
185 | end
186 | 


--------------------------------------------------------------------------------