├── demo
    ├── tree-data.edn
    ├── play-writer-data.edn
    ├── questions.edn
    ├── qna-with-eval.edn
    ├── play-writer-prompt.edn
    └── tree-prompt.edn
├── .dir-locals.el
├── resources
    ├── prompt-palette
    │   ├── text-analysis
    │   │   ├── extraction.edn
    │   │   ├── classification.edn
    │   │   └── instruction.edn
    │   ├── generation
    │   │   └── summarization.edn
    │   └── problem-solvers.edn
    ├── mcp-example
    │   └── echo.py
    └── env.edn
├── notebook
    ├── assets
    │   ├── cod.png
    │   ├── cove.png
    │   ├── gpt_ner.png
    │   ├── mitproxy.png
    │   ├── ner2024.png
    │   ├── prompt_ner.png
    │   ├── cov_contexts.png
    │   ├── ner_example.png
    │   └── gpt_ner_validation.png
    ├── examples
    │   ├── rag.clj
    │   ├── tree_prompts.clj
    │   ├── function_nodes.clj
    │   └── math_generate_code.clj
    ├── lc_presentation.clj
    ├── document_loading.clj
    ├── helpers.clj
    ├── index.clj
    ├── observability.clj
    ├── papers
    │   ├── chain_of_density.clj
    │   └── llms_as_optimizers.clj
    └── text_splitting.clj
├── secrets.edn.sample
├── src
    └── bosquet
    │   ├── mcp
    │       ├── tools.clj
    │       ├── transport.clj
    │       ├── client.clj
    │       ├── stdio_transport.clj
    │       └── core.clj
    │   ├── db
    │       ├── vector_db.clj
    │       ├── cache.clj
    │       └── qdrant.clj
    │   ├── memory
    │       ├── encoding.clj
    │       ├── simple_memory.clj
    │       ├── long_term_memory.clj
    │       ├── memory.clj
    │       └── retrieval.clj
    │   ├── tool
    │       ├── weather.clj
    │       └── math.clj
    │   ├── nlp
    │       ├── similarity.clj
    │       └── splitter.clj
    │   ├── llm
    │       ├── gen_data.clj
    │       ├── wkk.clj
    │       ├── claude.clj
    │       ├── schema.clj
    │       ├── localai.clj
    │       ├── http.clj
    │       ├── embeddings.clj
    │       ├── openai.clj
    │       ├── cohere.clj
    │       ├── tools.clj
    │       ├── ollama.clj
    │       ├── oai_shaped_llm.clj
    │       └── openai_tokens.clj
    │   ├── agent
    │       ├── tool.clj
    │       ├── agent_mind_reader.clj
    │       ├── wikipedia.clj
    │       └── react.clj
    │   ├── wkk.clj
    │   ├── converter.clj
    │   ├── template
    │       ├── read.clj
    │       └── selmer.clj
    │   ├── read
    │       └── document.clj
    │   ├── utils.clj
    │   ├── eval
    │       ├── evaluator.clj
    │       └── qna_generator.clj
    │   ├── env.clj
    │   └── cli.clj
├── config.edn.sample
├── docs
    └── _data
    │   ├── 8VtTJZmeZFgi5jQQrhbKmgfhWtTVRsncgVAVntmENdmkxdVzvCBCAgeL89V5ijjd16SAeqDbwFqoEjvCtHAGquwA7x.png
    │   ├── 8VvgKCu9ykPVamyZatEXZAczLZVvnP4qYyrNoZ5aGV4eaKZVTKAJeysnjBAYnrErmUw9gXDB6D2jHbaiN95ZtMJh7H.png
    │   ├── 8VxPcwBKTpJYGhUh1iu8eMpEzUxMAR5rPrBwgSCiDih5a6h4TvWnpJjbVqMWgazBs8CPhnD8wXTktzUsEho2BmGQR2.png
    │   └── 8VxQdUsnZEm8d6XhXUMKQDWiG4gVKydAesgewDPzecv5hvsdjBsgiWnaCS7xAzigSCzhkxjfaqvodofXbRTXUUdWGG.png
├── test
    └── bosquet
    │   ├── llm
    │       ├── schema_test.clj
    │       ├── gen_data_test.clj
    │       ├── cohere_test.clj
    │       ├── openai_tokens_test.clj
    │       ├── oai_shaped_llm_test.clj
    │       ├── tools_test.clj
    │       └── generator_test.clj
    │   ├── utils_test.clj
    │   ├── agent
    │       ├── wikipedia_test.clj
    │       └── agent_mind_reader_test.clj
    │   ├── env_test.clj
    │   ├── memory
    │       ├── memory_test.clj
    │       ├── retrieval_test.clj
    │       └── simple_memory_test.clj
    │   ├── template
    │       ├── selmer_test.clj
    │       └── read_test.clj
    │   ├── converter_test.clj
    │   └── nlp
    │       └── splitter_test.clj
├── tests.edn
├── .gitignore
├── .github
    ├── FUNDING.yml
    └── CONTRIBUTING.md
├── dev
    └── user.clj
├── bb.edn
└── deps.edn


/demo/tree-data.edn:
--------------------------------------------------------------------------------
1 | {:a 5 :b 2 :c 1}
2 | 


--------------------------------------------------------------------------------
/.dir-locals.el:
--------------------------------------------------------------------------------
1 | ((nil
2 |   (cider-clojure-cli-aliases . ":dev:test")))
3 | 


--------------------------------------------------------------------------------
/demo/play-writer-data.edn:
--------------------------------------------------------------------------------
1 | {:title "The immanent collapse" :genre "doom"}
2 | 


--------------------------------------------------------------------------------
/resources/prompt-palette/text-analysis/extraction.edn:
--------------------------------------------------------------------------------
1 | {:extraction/entitites ""}
2 | 


--------------------------------------------------------------------------------
/notebook/assets/cod.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zmedelis/bosquet/HEAD/notebook/assets/cod.png


--------------------------------------------------------------------------------
/notebook/assets/cove.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zmedelis/bosquet/HEAD/notebook/assets/cove.png


--------------------------------------------------------------------------------
/notebook/assets/gpt_ner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zmedelis/bosquet/HEAD/notebook/assets/gpt_ner.png


--------------------------------------------------------------------------------
/notebook/assets/mitproxy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zmedelis/bosquet/HEAD/notebook/assets/mitproxy.png


--------------------------------------------------------------------------------
/notebook/assets/ner2024.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zmedelis/bosquet/HEAD/notebook/assets/ner2024.png


--------------------------------------------------------------------------------
/notebook/assets/prompt_ner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zmedelis/bosquet/HEAD/notebook/assets/prompt_ner.png


--------------------------------------------------------------------------------
/notebook/assets/cov_contexts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zmedelis/bosquet/HEAD/notebook/assets/cov_contexts.png


--------------------------------------------------------------------------------
/notebook/assets/ner_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zmedelis/bosquet/HEAD/notebook/assets/ner_example.png


--------------------------------------------------------------------------------
/notebook/assets/gpt_ner_validation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zmedelis/bosquet/HEAD/notebook/assets/gpt_ner_validation.png


--------------------------------------------------------------------------------
/secrets.edn.sample:
--------------------------------------------------------------------------------
1 | {:openai  {:api-key "....."}
2 |  :cohere  {:api-key "....."}
3 |  :mistral {:api-key "....."}
4 |  :claude  {:api-key "....."}}
5 | 


--------------------------------------------------------------------------------
/src/bosquet/mcp/tools.clj:
--------------------------------------------------------------------------------
1 | (ns bosquet.mcp.tools
2 |   "Dynamically populated namespace for MCP tools.
3 |   Tools are added here when MCP servers are initialized.")
4 | 


--------------------------------------------------------------------------------
/src/bosquet/db/vector_db.clj:
--------------------------------------------------------------------------------
1 | (ns bosquet.db.vector-db)
2 | 
3 | (defprotocol VectorDB
4 |   (create [_this])
5 |   (delete [_this])
6 |   (add [_this _docs])
7 |   (search [_this _query _search-opts]))
8 | 


--------------------------------------------------------------------------------
/config.edn.sample:
--------------------------------------------------------------------------------
1 | {:default-llm {:temperature 0 :model :mistral-small}
2 |  :mcp-servers [
3 |  {:name "echo-server"
4 |   :type :stdio
5 |   :command "python3"
6 |   :args ["resources/mcp-example/echo.py"]}]}
7 | 


--------------------------------------------------------------------------------
/docs/_data/8VtTJZmeZFgi5jQQrhbKmgfhWtTVRsncgVAVntmENdmkxdVzvCBCAgeL89V5ijjd16SAeqDbwFqoEjvCtHAGquwA7x.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zmedelis/bosquet/HEAD/docs/_data/8VtTJZmeZFgi5jQQrhbKmgfhWtTVRsncgVAVntmENdmkxdVzvCBCAgeL89V5ijjd16SAeqDbwFqoEjvCtHAGquwA7x.png


--------------------------------------------------------------------------------
/docs/_data/8VvgKCu9ykPVamyZatEXZAczLZVvnP4qYyrNoZ5aGV4eaKZVTKAJeysnjBAYnrErmUw9gXDB6D2jHbaiN95ZtMJh7H.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zmedelis/bosquet/HEAD/docs/_data/8VvgKCu9ykPVamyZatEXZAczLZVvnP4qYyrNoZ5aGV4eaKZVTKAJeysnjBAYnrErmUw9gXDB6D2jHbaiN95ZtMJh7H.png


--------------------------------------------------------------------------------
/docs/_data/8VxPcwBKTpJYGhUh1iu8eMpEzUxMAR5rPrBwgSCiDih5a6h4TvWnpJjbVqMWgazBs8CPhnD8wXTktzUsEho2BmGQR2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zmedelis/bosquet/HEAD/docs/_data/8VxPcwBKTpJYGhUh1iu8eMpEzUxMAR5rPrBwgSCiDih5a6h4TvWnpJjbVqMWgazBs8CPhnD8wXTktzUsEho2BmGQR2.png


--------------------------------------------------------------------------------
/docs/_data/8VxQdUsnZEm8d6XhXUMKQDWiG4gVKydAesgewDPzecv5hvsdjBsgiWnaCS7xAzigSCzhkxjfaqvodofXbRTXUUdWGG.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zmedelis/bosquet/HEAD/docs/_data/8VxQdUsnZEm8d6XhXUMKQDWiG4gVKydAesgewDPzecv5hvsdjBsgiWnaCS7xAzigSCzhkxjfaqvodofXbRTXUUdWGG.png


--------------------------------------------------------------------------------
/resources/prompt-palette/text-analysis/classification.edn:
--------------------------------------------------------------------------------
 1 | {:classification/classify
 2 |  "{{instruction/extract}}
 3 | {{instruction/batch}}
 4 | 
 5 | {{text-type|default:text}}:
 6 | {{instruction/batch-items}}
 7 | 
 8 | {{fact}}:
 9 | {% llm-generate model=text-davinci-003 var-name=completion %}"}
10 | 


--------------------------------------------------------------------------------
/resources/prompt-palette/generation/summarization.edn:
--------------------------------------------------------------------------------
 1 | {:text-analyzer/summarize-to-sentence
 2 |  "Provide a summary of the following {{text-type|default:text}} in a single sentence:
 3 | 
 4 | Text:
 5 | {{text}}
 6 | 
 7 | Summary:
 8 | {% llm-generate max-tokens=60 var-name=completion %}"
 9 | 
10 | 
11 |  }
12 | 


--------------------------------------------------------------------------------
/src/bosquet/memory/encoding.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.memory.encoding)
 2 | 
 3 | (defprotocol Encoder
 4 |   (encode [_this _text]))
 5 | 
 6 | ;; Memory encoder that changes nothing. Observations are stored as is.
 7 | (deftype AsIsEncoder
 8 |          [] Encoder
 9 |          (encode
10 |            [_this text] text))
11 | 


--------------------------------------------------------------------------------
/test/bosquet/llm/schema_test.clj:
--------------------------------------------------------------------------------
1 | (ns bosquet.llm.schema-test
2 |   (:require
3 |    [clojure.test :refer [deftest is]]
4 |    [bosquet.llm.schema :refer [model-mapping]]))
5 | 
6 | (deftest test-model-mapping
7 |   (is (= :y (model-mapping {:model-name-mapping {:x :y}} :x)))
8 |   (is (= :z (model-mapping {:model-name-mapping {:x :y}} :z))))
9 | 


--------------------------------------------------------------------------------
/test/bosquet/utils_test.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.utils-test
 2 |   (:require [bosquet.utils :as u]
 3 |             [clojure.test :refer [deftest is]]))
 4 | 
 5 | (deftest snake-case-conversions
 6 |   (is (= {:fox_box 1 :box_fox 2 :box 3}
 7 |          (u/snake-case {:fox-box 1 :boxFox 2 :BOX 3})))
 8 |   (is (= {:fox_box {:bird_grid 1}}
 9 |          (u/snake-case {:fox-box {:birdGrid 1}}))))
10 | 


--------------------------------------------------------------------------------
/tests.edn:
--------------------------------------------------------------------------------
1 | {:kaocha/watch?   true
2 |  :kaocha/color?   true
3 |  :kaocha/reporter [kaocha.report/dots]
4 |  :kaocha/tests    [{:kaocha/source-paths  ["src"]
5 |                     :kaocha/test-paths    ["test"]
6 |                     :kaocha/ns-patterns   ["-test$"]
7 |                     :kaocha.testable/id   :unit
8 |                     :kaocha.testable/type :kaocha.type/clojure.test}]}
9 | 


--------------------------------------------------------------------------------
/src/bosquet/mcp/transport.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.mcp.transport)
 2 | 
 3 | (defprotocol MCPTransport
 4 |   "Protocol for MCP transport implementations"
 5 |   (send-request [this method params] "Send a request and return response")
 6 |   (send-notification [this method params] "Send a notification (no response expected)")
 7 |   (close [this] "Close the transport connection"))
 8 | 
 9 | 
10 | 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/demo/questions.edn:
--------------------------------------------------------------------------------
1 | [{:question "Who was the primary architect behind the construction of the Great Wall of China?"}
2 |  {:question "What real-life technological advancements in the 19th century inspired Verne's portrayal of the Nautilus?"}
3 |  {:question "What are the key scientific principles behind the development of CRISPR gene-editing technology?"}
4 |  {:question "What instruments and technology were onboard Voyager 1?"}]
5 | 


--------------------------------------------------------------------------------
/test/bosquet/agent/wikipedia_test.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.agent.wikipedia-test
 2 |   (:require
 3 |    [bosquet.agent.wikipedia :as w]
 4 |    [clojure.test :refer [deftest is]]))
 5 | 
 6 | (def ^:private fox-result
 7 |   ["Fox" "Fox News" "Fox Broadcasting Company"])
 8 | 
 9 | (deftest best-match-test
10 |   (is (= "Fox" (w/best-match "Fox" fox-result)))
11 |   (is (= "Fox" (w/best-match "Box" fox-result)))
12 |   (is (nil? (w/best-match "Box" []))))
13 | 


--------------------------------------------------------------------------------
/demo/qna-with-eval.edn:
--------------------------------------------------------------------------------
1 | {:question-answer ["Question: {{question}}"
2 |                    "Answer: {{answer}}"]
3 |  :answer          #:llm{:service :openai :model-params {:model :gpt-4 :max-tokens 240}}
4 |  :eval            ["{{question-answer}}"
5 |                    ""
6 |                    "Is this a correct answer?"
7 |                    "{{assessment}}"]
8 |  :assessment      #:llm{:service :cohere :model-params {:model :command :max-tokens 80}}}
9 | 


--------------------------------------------------------------------------------
/test/bosquet/env_test.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.env-test
 2 |   (:require
 3 |    [bosquet.env :as env]
 4 |    [clojure.string :as str]
 5 |    [clojure.test :as t]))
 6 | 
 7 | (t/deftest config-location
 8 |   (with-redefs [env/exists? (fn [_d] true)]
 9 |     (t/is (str/starts-with?
10 |            (.getPath (env/bosquet-cfg-file "config.edn"))
11 |            "./")))
12 |   (with-redefs [env/exists? (fn [_d] false)]
13 |     (t/is (= "./config.edn" (.getPath (env/bosquet-cfg-file "config.edn"))))))
14 | 


--------------------------------------------------------------------------------
/resources/prompt-palette/text-analysis/instruction.edn:
--------------------------------------------------------------------------------
 1 | {:instruction/extract
 2 |  "Identify {{fact}} as: {{classes|join: , }} for the provided {{text-type|default:text}}."
 3 | 
 4 |  :instruction/answer-yes-no
 5 |  "Answer only with 'yes' or 'no'."
 6 | 
 7 |  :instruction/batch
 8 |  "Provide answers in the same numbered list format and order as the {{text-type|default:text}} list bellow."
 9 | 
10 |  :instruction/batch-items
11 |  "{% for t in texts %}
12 | {{forloop.counter}}. {{t}}{% endfor %}"}
13 | 


--------------------------------------------------------------------------------
/test/bosquet/memory/memory_test.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.memory.memory-test
 2 |   (:require
 3 |    [bosquet.memory.memory :as m]
 4 |    [bosquet.memory.retrieval :as r]
 5 |    [bosquet.wkk :as wkk]
 6 |    [clojure.test :refer [deftest is]]))
 7 | 
 8 | (deftest available-memories-test
 9 |   (is (= [:message1 :message2]
10 |         ;; memory is not configured, return existing messages as is
11 |          (m/available-memories {wkk/recall-function r/recall-sequential}
12 |                                [:message1 :message2]))))
13 | 


--------------------------------------------------------------------------------
/test/bosquet/template/selmer_test.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.template.selmer-test
 2 |   (:require
 3 |    [clojure.test :refer [deftest is]]
 4 |    [bosquet.template.selmer :refer [clear-gen-var-slot]]))
 5 | 
 6 | (deftest clearing-after-gen-slot
 7 |   (is (= "" (clear-gen-var-slot "{{x}}" :x)))
 8 |   (is (= "" (clear-gen-var-slot "{{x}} = 10" :x)))
 9 |   (is (= "" (clear-gen-var-slot "{{x}} = 10" :x)))
10 |   (is (= "{{x}} = " (clear-gen-var-slot "{{x}} = {{y}}" :y)))
11 |   (is (= "{{x}}\n=\n" (clear-gen-var-slot "{{x}}\n=\n{{y}}" :y)))
12 |   (is (= "{{o/x}}\n=" (clear-gen-var-slot "{{o/x}}\n={{o/y}} DONE" :o/y))))
13 | 


--------------------------------------------------------------------------------
/demo/play-writer-prompt.edn:
--------------------------------------------------------------------------------
 1 | [[:system "You are an amazing writer."]
 2 |  [:user ["Write a synopsis for the play:"
 3 |          "Title: {{title}}"
 4 |          "Genre: {{genre}}"
 5 |          "Synopsis:"]]
 6 |  [:assistant #:llm{:service      :openai
 7 |                    :model-params {:model :gpt-4 :temperature 0.8 :max-tokens 120}
 8 |                    :var-name     :synopsis}]
 9 |  [:user "Now write a critique of the above synopsis:"]
10 |  [:assistant #:llm{:service      :mistral
11 |                    :model-params {:model :mistral-small :temperature 0.2 :max-tokens 120}
12 |                    :var-name     :critique}]]
13 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | !.clj-kondo/config.edn
 2 | *.class
 3 | *.iml
 4 | *.jar
 5 | *.log
 6 | *.swp
 7 | *~
 8 | .DS_Store
 9 | .cache
10 | .calva/output-window/
11 | .classpath
12 | .clerk
13 | .clj-kondo
14 | .cpcache
15 | .eastwood
16 | .factorypath
17 | .hg/
18 | .hgignore
19 | .idea/
20 | .java-version
21 | .lein-*
22 | .lsp/.cache
23 | .lsp/sqlite.db
24 | .nrepl-history
25 | .nrepl-port
26 | .portal
27 | .project
28 | .projectile-cache.eld
29 | .rebel_readline_history
30 | .settings
31 | .socket-repl-port
32 | .sw*
33 | .vscode
34 | /checkouts
35 | /classes
36 | /target
37 | bllm
38 | config.edn
39 | data
40 | models
41 | qdrant_storage
42 | secrets.edn
43 | 


--------------------------------------------------------------------------------
/test/bosquet/llm/gen_data_test.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.llm.gen-data-test
 2 |   (:require
 3 |    [bosquet.llm.gen-data :refer [reduce-gen-graph total-usage]]
 4 |    [clojure.test :refer [deftest is]]))
 5 | 
 6 | (deftest usage-aggregation
 7 |   (is (= {:total 15 :completion 12 :prompt 3}
 8 |          (total-usage
 9 |           {:x {:total 10 :completion 8 :prompt 2}
10 |            :y {:total 5 :completion 4 :prompt 1}}))))
11 | 
12 | (deftest gen-graph-reduction
13 |   (is (= {:y 100}
14 |          (reduce-gen-graph (fn [m k _v] (assoc m k 100))
15 |                            {:x "do not change me"
16 |                             :y {:llm/gen 10}}))))
17 | 


--------------------------------------------------------------------------------
/src/bosquet/db/cache.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.db.cache
 2 |   (:require
 3 |    [clojure.core.cache.wrapped :as w]))
 4 | 
 5 | (defn ->cache []
 6 |   (w/fifo-cache-factory {}))
 7 | 
 8 | (def cache (->cache))
 9 | 
10 | (defn evict
11 |   [props]
12 |   (w/evict cache props))
13 | 
14 | (defn evict-all
15 |   []
16 |   (doseq [k (keys @cache)]
17 |     (evict k)))
18 | 
19 | (defn lookup-or-call
20 |   "Call `gen-fn` function with `properties` that are used as
21 |   a key for cache. Same props (model params and context) will hit
22 |   the cache"
23 |   [gen-fn properties]
24 |   (w/lookup-or-miss cache properties
25 |                     (fn [_item] (gen-fn properties))))
26 | 


--------------------------------------------------------------------------------
/src/bosquet/tool/weather.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.tool.weather
 2 |   (:require
 3 |    [clojure.string :as str]
 4 |    [taoensso.timbre :as timbre]))
 5 | 
 6 | (defn ^{:desc "Get the current weather in a given location"} get-current-weather
 7 |   [^{:type "string" :desc "The city, e.g. Vilnius"} location]
 8 |   (timbre/infof "Applying get-current-weather for location %s" location)
 9 |   (case (str/lower-case location)
10 |     "vilnius" {:location "Vilnius" :temperature "24" :unit "celcius"}
11 |     "tokyo" {:location "Tokyo" :temperature "30" :unit "celcius"}
12 |     "paris" {:location "Paris" :temperature "27" :unit "celcius"}
13 |     {:location location :temperature "unknown"}))
14 | 


--------------------------------------------------------------------------------
/test/bosquet/template/read_test.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.template.read-test
 2 |   (:require
 3 |    [bosquet.llm.wkk :as wkk]
 4 |    [bosquet.template.read :refer [data-slots]]
 5 |    [clojure.test :refer [deftest is]]))
 6 | 
 7 | (deftest data-slots-test
 8 |   (is (= #{:x :y} (data-slots "{{x}} + {{y}}")))
 9 |   (is #{:v1 :v2}
10 |       (data-slots [[:user "ab {{v1}}"]
11 |                    [:assistant {:llm :test wkk/var-name :ref1}]
12 |                    [:user ["1" "2 {{v2}} {{ref1}}"]]]))
13 |   (is (= #{:v1} (data-slots {:x "aa" :y "bb {{v1}}" :z "{{y}}"})))
14 |   (is (= #{:data1 :data2} (data-slots {:q "{{data1}} {{data2}} = {{a}}"
15 |                                        :a {:llm :oai}}))))
16 | 


--------------------------------------------------------------------------------
/src/bosquet/tool/math.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.tool.math
 2 |   (:require
 3 |    [taoensso.timbre :as timbre]))
 4 | 
 5 | (defn ^{:desc "add 'x' and 'y'"} add
 6 |   [^{:type "number" :desc "First number to add"} x
 7 |    ^{:type "number" :desc "Second number to add"} y]
 8 |   (timbre/infof "Applying add for %s %s" x y)
 9 |   (+ (if (number? x)  x (Float/parseFloat x))
10 |      (if (number? y)  y (Float/parseFloat y))))
11 | 
12 | (defn ^{:desc "subtract 'y' from 'x'"} sub
13 |   [^{:type "number" :desc "Number to subtract from"} x
14 |    ^{:type "number" :desc "Number to subtract"} y]
15 |   (timbre/infof "Applying sub for %s %s" x y)
16 |   (- (if (number? x)  x (Float/parseFloat x))
17 |      (if (number? y)  y (Float/parseFloat y))))
18 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: zmedelis
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 | 


--------------------------------------------------------------------------------
/notebook/examples/rag.clj:
--------------------------------------------------------------------------------
 1 | (ns examples.rag
 2 |   (:require
 3 |    [bosquet.llm.openai-tokens :as tokenizer]
 4 |    [bosquet.system :as system]
 5 |    [bosquet.wkk :as wkk]
 6 |    [clojure.string :as string]
 7 |    [hfds-clj.core :as hfds]))
 8 | 
 9 | 
10 | (def fiqa-dataset
11 |   (hfds/load-dataset {:dataset "explodinggradients/fiqa"
12 |                       :config "corpus"
13 |                       :split "corpus"}))
14 | 
15 | (defn ds->text
16 |   [ds]
17 |   (string/join "\n" (map :doc ds)))
18 | 
19 | (def ds-token-count (tokenizer/token-count (ds->text fiqa-dataset) :text-embedding-ada-002))
20 | 
21 | ;; 1. Create embeddings
22 | ;; 2. Create RAG pipeline
23 | ;; 3. Use RAGA to evaluate the quality
24 | 
25 | (defn ds->text
26 |   [ds]
27 |   (tokenizer/embeddings-price-estimate
28 |    (string/join "\n" (map :doc ds))))
29 | 
30 | (def mem (system/get-memory wkk/long-term-embeddings-memory))
31 | 


--------------------------------------------------------------------------------
/src/bosquet/nlp/similarity.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.nlp.similarity
 2 |   (:import [org.apache.commons.text.similarity
 3 |             CosineDistance JaccardDistance JaroWinklerDistance]))
 4 | 
 5 | (defn cosine-distance [s1 s2]
 6 |   (.apply (CosineDistance.) s1 s2))
 7 | 
 8 | (defn jackard-distance [s1 s2]
 9 |   (.apply (JaccardDistance.) s1 s2))
10 | 
11 | (defn jaro-winkler-distance [s1 s2]
12 |   (.apply (JaroWinklerDistance.) s1 s2))
13 | 
14 | (comment
15 |   (def s1 "I want to explore the potential of integrating LLMs with external knowledge")
16 |   (def s2 "Let's explore how LLMs can integrate with external knowledge")
17 |   (def s3 "I want to explore the city")
18 |   (cosine-distance s1 s2)
19 |   (cosine-distance s1 s3)
20 | 
21 |   (jackard-distance s1 s2)
22 |   (jackard-distance s1 s3)
23 | 
24 |   (jaro-winkler-distance s1 s2)
25 |   (jaro-winkler-distance s1 s3)
26 |   (jaro-winkler-distance s2 s3)
27 |   #__)
28 | 


--------------------------------------------------------------------------------
/test/bosquet/memory/retrieval_test.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.memory.retrieval-test
 2 |   (:require
 3 |    [bosquet.llm.wkk :as wkk]
 4 |    [bosquet.memory.retrieval :as r]
 5 |    [bosquet.memory.simple-memory :as simple-memory]
 6 |    [clojure.test :refer [deftest is]]))
 7 | 
 8 | (deftest memory-object-size-test
 9 |   (is (= 1 (r/memory-object-size "A" "gpt-4" wkk/openai)))
10 |   (is (= 6 (r/memory-object-size "Call me Ishmael." "gpt-3.5-turbo" wkk/openai))))
11 | 
12 | (defn- memorize []
13 |   (let [mem (simple-memory/->remember)]
14 |     (simple-memory/forget)
15 |     (doseq [m ["One monkey" "Two monkeys" "Three monkeys"
16 |                "Four monkeys" "Five monkeys"]]
17 |       (mem nil m))))
18 | 
19 | (deftest cue-retrieval
20 |   (memorize)
21 |   ((simple-memory/->remember) nil "10 jumping donkeys")
22 |   (is (= ["Four monkeys"]
23 |          ((simple-memory/->cue-memory)
24 |           {r/content-similarity-threshold 0.01}
25 |           "Four monkeys"))))
26 | 


--------------------------------------------------------------------------------
/src/bosquet/llm/gen_data.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.llm.gen-data
 2 |   "All things concerning generation data:
 3 |   * converting inputs and outputs
 4 |   * usage tokens calculations
 5 |   * etc")
 6 | 
 7 | (defn total-usage
 8 |   "Calculate aggreage token usage across all ai generation nodes"
 9 |   [usages]
10 |   (reduce-kv
11 |    (fn [{:keys [prompt completion total]
12 |          :or  {prompt 0 completion 0 total 0}
13 |          :as  aggr}
14 |         _k
15 |         {p :prompt c :completion t :total}]
16 |      (assoc aggr
17 |             :prompt (+ (or p 0) prompt)
18 |             :completion (+ (or c 0) completion)
19 |             :total (+ (or t 0) total)))
20 |    {}
21 |    usages))
22 | 
23 | (defn reduce-gen-graph
24 |   "Reduce generation prompt map, where a node is representing llm generation spec
25 |   call `gen-node-fn`"
26 |   [gen-node-fn prompt-map]
27 |   (reduce-kv
28 |    (fn [m k v]
29 |      (if (map? v) (gen-node-fn m k v) m))
30 |    {} prompt-map))
31 | 


--------------------------------------------------------------------------------
/src/bosquet/llm/wkk.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.llm.wkk)
 2 | 
 3 | ;; # Well Known Keys
 4 | 
 5 | (def model :llm/model)
 6 | 
 7 | (def service :llm/service)
 8 | 
 9 | (def content :llm/content)
10 | 
11 | (def usage :llm/usage)
12 | 
13 | (def generation-type :llm/type)
14 | 
15 | (def openai :openai)
16 | 
17 | (def localai :localai)
18 | 
19 | (def cohere :cohere)
20 | 
21 | (def lmstudio :lmstudio)
22 | 
23 | (def mistral :mistral)
24 | 
25 | (def ollama :ollama)
26 | 
27 | (def model-params :llm/model-params)
28 | (def chat-fn :chat-fn)
29 | (def complete-fn :complete-fn)
30 | (def embed-fn :embed-fn)
31 | (def var-name :llm/var-name)
32 | 
33 | (def output-format
34 |   "Type of generation output format: json, xml, text, etc"
35 |   :llm/output-format)
36 | 
37 | (def cache
38 |   "LLM call parameter specifying if cached results should be used"
39 |   :llm/cache)
40 | 
41 | (def fun-impl
42 |   :fun/impl)
43 | 
44 | (def fun-args
45 |   :fun/args)
46 | 
47 | (def tools
48 |   :llm/tools)
49 | 


--------------------------------------------------------------------------------
/resources/prompt-palette/problem-solvers.edn:
--------------------------------------------------------------------------------
 1 | {:problem-solver/basic-qna
 2 |  "Problem: {{prompt-example/problem}}
 3 | Solution: {{prompt-example/solution}}
 4 | Problem: {{completion/problem}}
 5 | Solution: {% llm-generate %}"
 6 | 
 7 |  :problem-solver/cot
 8 |  "Problem: {{prompt-example/problem}}
 9 | Solution: {{prompt-example/cot}} {{prompt-example/solution}}
10 | Problem: {{completion/problem}}
11 | {% llm-generate %}"
12 | 
13 | 
14 |  :problem-solver/zero-shot
15 |  "{{prompt/complete-for}}
16 | Let's think step by step.
17 | {{aigen/complete}}
18 | Therefore, the answer (arabic numerals) is {% llm-generate %}"}
19 | 
20 | ;; "Basic Problem-Solution prompt giving one `problem` and its ;;
21 | ;; `solution` example. Optional `context` to start with setting the topic."
22 | ;;
23 | ;; "[Chain-of-Thought Prompting Elicits Reasoning in Large Language Models](https://arxiv.org/pdf/2201.11903.pdf) ;;
24 | 
25 | ;;   Good for:
26 | ;;   - arithmetic
27 | ;;   - commonsense
28 | ;;   - symbolic reasoning."
29 | 


--------------------------------------------------------------------------------
/src/bosquet/llm/claude.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.llm.claude
 2 |   (:require
 3 |    [bosquet.env :as env]
 4 |    [bosquet.llm.http :as http]
 5 |    [bosquet.llm.wkk :as wkk]
 6 |    [bosquet.llm.schema :as schema]
 7 |    [bosquet.utils :as u]))
 8 | 
 9 | (defn usage->canonical
10 |   [{:keys [input_tokens output_tokens]}]
11 |   {schema/usage-in-count input_tokens
12 |    schema/usage-out-count output_tokens
13 |    schema/usage-total-count (+ output_tokens input_tokens)})
14 | 
15 | (defn- header [key]
16 |   {:headers {"x-api-key"         key
17 |              "anthropic-version" "2023-06-01"}})
18 | 
19 | (defn messages
20 |   ([params] (messages (wkk/openai env/config) params))
21 |   ([{key :api-key url :api-endpoint} params]
22 |    (u/log-call url params)
23 |    (let [{:keys [content usage]} (http/resilient-post (str url "/messages") (header key) params)]
24 |      {wkk/generation-type :chat
25 |       wkk/content         {:role    :assistant
26 |                            :content (-> content last :text)}
27 |       wkk/usage           (usage->canonical usage)})))
28 | 


--------------------------------------------------------------------------------
/test/bosquet/llm/cohere_test.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.llm.cohere-test
 2 |   (:require
 3 |    [bosquet.llm.cohere :refer [complete chatml->cohere]]
 4 |    [bosquet.llm.wkk :as wkk]
 5 |    [cohere.client :as client]
 6 |    [clojure.test :refer [deftest is]]))
 7 | 
 8 | (deftest complete-test
 9 |   (is (= {wkk/generation-type :completion
10 |           wkk/content         {:completion "5"}
11 |           wkk/usage           {:prompt 7 :completion 1 :total 8}}
12 |          (with-redefs [client/generate
13 |                        (fn [_]
14 |                          {:generations [{:text "5"}]
15 |                           :meta        {:billed_units {:input_tokens  7
16 |                                                        :output_tokens 1}}})]
17 |            (complete {:prompt "2 + 2 ="})))))
18 | 
19 | (deftest chatml->cohere-test
20 |   (is (= [{:user_name :a :text "2 + 2 ="}
21 |           {:user_name :b :text "4"}
22 |           {:user_name :a :text "thanks!"}]
23 |          (chatml->cohere
24 |           [{:role :a :content "2 + 2 ="}
25 |            {:role :b :content "4"}
26 |            {:role :a :content "thanks!"}]))))
27 | 


--------------------------------------------------------------------------------
/notebook/lc_presentation.clj:
--------------------------------------------------------------------------------
 1 | (ns lc-presentation)
 2 | 
 3 | ;; #### Prompt template
 4 | ;; Initial version of the prompt template included a custom Selmer tag to trigger definitions:
 5 | ;; ```
 6 | ;; Question: \{{Q}}
 7 | ;; Answer: {% gen var-name=answer model=gpt-4 temperature=0.2 %}
 8 | ;; ```
 9 | ;; This was abandoned in favor of less Selmer customization and ability to define LLM calls externally
10 | ;; ```edn
11 | ;; {:q1   ["Q: When I was {{age}} my sister was half my age."
12 | ;;         "Now I’m 70 how old is my sister? A: {{a}}"]
13 | ;;  :a    {:llm/service :ollama :llm/model-params {:model :llama2})}
14 | ;; ```
15 | ;;
16 | ;; More in /notebook/user_guide -> Define tempaltes
17 | ;;
18 | ;; #### Environment setup
19 | ;; Why a simple call like this works
20 | ;; ```
21 | ;; (gen "xxx")
22 | ;; ```
23 | ;; Bosquet adds a default llm config turning this into a
24 | ;; ```edn
25 | ;; {:prompt "xxx {{gen}}"
26 | ;;  :gen :DEFAULT-LLM}
27 | ;; ```
28 | ;; The initial solution was based on Integrant but that was too cumbersome and hard to extend.
29 | ;; The defaults are defined like this: `env.edn`
30 | ;; OR (and this I learned from the early lib users) people want to pass in custom definitions:
31 | 


--------------------------------------------------------------------------------
/src/bosquet/agent/tool.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.agent.tool
 2 |   (:require [taoensso.timbre :as timbre]))
 3 | 
 4 | (defprotocol Tool
 5 |   (my-name [this])
 6 |   (search [this ctx])
 7 |   (lookup [this ctx])
 8 |   (finish [this ctx]))
 9 | 
10 | (defn call-tool [agent action ctx]
11 |   (condp = action
12 |     :search (let [result (search agent ctx)]
13 |               {:lookup-db    [[0 true result]]
14 |                :lookup-index 0})
15 |     :lookup (lookup agent ctx)
16 |     :finish (finish agent ctx)))
17 | 
18 | ;;
19 | ;; Logging tool/agent thinking/acting
20 | ;;
21 | 
22 | (defn print-indexed-step [action plan step]
23 |   (timbre/info (format "%s: %s" (name action) step))
24 |   (timbre/info plan))
25 | 
26 | (defn print-action [action parameters step]
27 |   (timbre/info "\nAct: " step)
28 |   (timbre/info "- Action: " (name action))
29 |   (timbre/info "- Parameters: " parameters))
30 | 
31 | (defn print-thought [plan content]
32 |   (timbre/info (str "\n" plan ":"))
33 |   (timbre/info content))
34 | 
35 | (defn print-result [result]
36 |   (timbre/info "Agent found the solution: " result))
37 | 
38 | (defn print-too-much-thinking-error [steps]
39 |   (timbre/info
40 |    (format "\nAgent was thinking for %s steps and failed to find a solution" steps)))
41 | 


--------------------------------------------------------------------------------
/src/bosquet/llm/schema.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.llm.schema)
 2 | 
 3 | ;; Place to define Malli schemas and conversions between different LLM
 4 | ;; shapes
 5 | ;;
 6 | ;; LLM input and output data transformations.
 7 | ;; a) Change Bosquet data: prompts, chat messages, usage and other elements
 8 | ;;    into whatever data shape is used be target LLM service
 9 | ;; b) Change LLM service responces: generations, usage data, etc into a single
10 | ;;    representation used by Bosquet
11 | 
12 | (defn model-mapping
13 |   "Check LLM service config if there are any aliases defined.
14 |   If model alias is found return it, if not use the `model` as is.
15 | 
16 |   Intended for usecases where templates define a certain model name and
17 |   without changes in the template a differently named by other provider
18 |   can be used."
19 |   [{model-map :model-name-mapping} model]
20 |   (get model-map model model))
21 | 
22 | (def usage-out-count
23 |   "Usage map key to indicate how many tokens were used for completion"
24 |   :completion)
25 | 
26 | (def usage-in-count
27 |   "Usage map key to indicate how many tokens were used for prompt"
28 |   :prompt)
29 | 
30 | (def usage-total-count
31 |   "Usage map key to indicate how many tokens were used for prompt and completion"
32 |   :total)
33 | 


--------------------------------------------------------------------------------
/notebook/document_loading.clj:
--------------------------------------------------------------------------------
 1 | ;; ## Document Loading
 2 | ;;
 3 | ;;
 4 | ;; Bosquet uses [Apache Tika](https://tika.apache.org/) for document parsing. Tika supports various document types:
 5 | ;; PDF, MS Office, and OpenOffice are but a few in the [complete list](https://tika.apache.org/2.9.1/formats.html)
 6 | ;; Document reading using Tika is done in the `bosquet.read.document` namespace.
 7 | 
 8 | (ns document-loading
 9 |   (:require
10 |    [bosquet.read.document :as d]))
11 | 
12 | ;; The `parse` function is wrapping Tika API. It accepts either a file name or an input stream of the document and returns a map containing two keys:
13 | ;; * **text** - with the extracted document text
14 | ;; * **metadata** - extracted document metadata, containing various entries depending on how the document was created: title, create date, authors, etc. Some of the metadata entries will be conveniently presented using the [Dublin Core](https://en.wikipedia.org/wiki/Dublin_Core) format
15 | 
16 | 
17 | ;; ### Few examples of document parsing:
18 | ;;
19 | ;; #### PDF document
20 | 
21 | ^{:nextjournal.clerk/auto-expand-results? true}
22 | (d/parse "data/memory.pdf")
23 | 
24 | ;; #### MS Excel document
25 | 
26 | ^{:nextjournal.clerk/auto-expand-results? true}
27 | (d/parse "data/netflix.xls")
28 | 


--------------------------------------------------------------------------------
/notebook/helpers.clj:
--------------------------------------------------------------------------------
 1 | (ns helpers
 2 |   (:require
 3 |    [clojure.string :as string]
 4 |    [nextjournal.clerk :as clerk]))
 5 | 
 6 | (defn wrap-in-el
 7 |   "Wrap collection of HTML elements in another HTML el.
 8 | 
 9 |   For example `el = :ul` and `coll = [[:li 1] [:li 2]]
10 |   =>
11 |   `[:ul [:li 1] [:li 2]]`"
12 |   [el coll]
13 |   (vec (cons el coll)))
14 | 
15 | (defn kv-cell [k v]
16 |   [:div.pb-2
17 |    [:div [:em (str k ":")]]
18 |    [:div v]])
19 | 
20 | (defn chatml-cell [{:keys [role content]}]
21 |   (kv-cell (string/capitalize (name role)) content))
22 | 
23 | (defn join [& lines]
24 |   (apply str (interpose "\n" lines)))
25 | 
26 | (defn text-div [text]
27 |   (clerk/html [:div text]))
28 | 
29 | (defn text-list [coll]
30 |   (when (seq coll)
31 |     (clerk/html
32 |       (wrap-in-el
33 |         :ul.list-disc
34 |         (->> coll
35 |           (remove string/blank?)
36 |           (mapv #(vector :li %)))))))
37 | 
38 | (defn card-list
39 |   [items]
40 |   (clerk/html
41 |    (vec
42 |     (cons
43 |      :div.font-mono
44 |      (map
45 |       (fn [item]
46 |         [:div.block.p-6.bg-white.border.border-gray-200.rounded-lg.shadow.hover:bg-gray-100.dark:bg-gray-800.dark:border-gray-700.dark:hover:bg-gray-700.grid.grid-cols-1.gap-3
47 |          [:div.flex
48 |           [:div item]]])
49 |       items)))))
50 | 


--------------------------------------------------------------------------------
/test/bosquet/llm/openai_tokens_test.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.llm.openai-tokens-test
 2 |   (:require
 3 |    [clojure.test :refer [deftest is]]
 4 |    [bosquet.llm.openai-tokens :as tok]))
 5 | 
 6 | (deftest encoding-decoding
 7 |   (let [txt    "A screaming comes across the sky."
 8 |         tokens (tok/encode txt :gpt-3.5-turbo)]
 9 |     (is (= txt (tok/decode tokens :gpt-3.5-turbo)))))
10 | 
11 | (deftest price-estimation
12 |   (is (= (+ (* 7 0.003))
13 |          (tok/generation-price-estimate
14 |           "A screaming comes across the sky."
15 |           :gpt-4)))
16 |   (is (= (+ (* 7 0.003) (* 15 0.006))
17 |          (tok/generation-price-estimate
18 |           "A screaming comes across the sky."
19 |           "It has happened before, but there is nothing to compare it to now."
20 |           :gpt-4)))
21 |   (is (= (+ (* 10 0.003) (* 20 0.006))
22 |          (tok/generation-price-estimate 10 20 :gpt-4)))
23 |   (is (= (* 0.0001 1000) (tok/embeddings-price-estimate
24 |                            ;; make 1k tokens; 'abc' = 1tok
25 |                           (apply str (take 1000 (repeat "abc"))))))
26 |   (is (= (* 0.0001 1000) (tok/embeddings-price-estimate 1000))))
27 | 
28 | (deftest max-tokens
29 |   (is (tok/fits-in-context-window? 1 :text-babbage-002))
30 |   (is (tok/fits-in-context-window? 2049 :text-babbage-002))
31 |   (is (not (tok/fits-in-context-window? 20000 :text-babbage-002))))
32 | 


--------------------------------------------------------------------------------
/src/bosquet/wkk.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.wkk)
 2 | 
 3 | ;;
 4 | ;; Well Known Keys to refer to various concepts properties,
 5 | ;; system components, etc.
 6 | ;;
 7 | 
 8 | (def service
 9 |   "Key to reference LLM service name in gen call parameters"
10 |   :bosquet.llm/service)
11 | 
12 | (def model-parameters
13 |   "Key to reference LLM model parameters in gen call parameters"
14 |   :bosquet.llm.model/parameters)
15 | 
16 | (def output-format
17 |   "Type of generation output format: json, xml, text, etc"
18 |   :bosquet.llm.output/format)
19 | 
20 | (def cache
21 |   :bosquet.llm/cache)
22 | 
23 | ;;
24 | ;; Memory
25 | ;;
26 | 
27 | (def memory-config
28 |   "Key to reference memory in configuration"
29 |   :bosquet.memory/config)
30 | 
31 | (def recall-parameters
32 |   "Memory parameters to be used when creating and retrieving a memory"
33 |   :bosquet.recall/parameters)
34 | 
35 | (def memory-system
36 |   "Memory system implementing memory/Memory protocol to be used in gen AI workflow"
37 |   :bosquet.memory/system)
38 | 
39 | (def recall-function
40 |   "Function to retrieve memory using passed in memory type"
41 |   :bosquet.recall/function)
42 | 
43 | ;;
44 | ;; Misc shortcuts
45 | ;;
46 | (def oai-service :llm/openai)
47 | 
48 | (def gpt3.5-turbo-with-cache
49 |   {service          oai-service
50 |    cache            true
51 |    model-parameters {:model :gpt-3.5-turbo}})
52 | 
53 | (def gpt4-turbo-with-cache
54 |   {service          oai-service
55 |    cache            true
56 |    model-parameters {:model :gpt-4-1106-preview}})
57 | 


--------------------------------------------------------------------------------
/src/bosquet/memory/simple_memory.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.memory.simple-memory
 2 |   (:require
 3 |    [bosquet.llm.wkk :as wkk]
 4 |    [bosquet.memory.retrieval :as r]
 5 |    [bosquet.nlp.similarity :as nlp]))
 6 | 
 7 | (def memory-store
 8 |   "This type of mem is mainly for dev purposes. Expose the atom for easy debuging."
 9 |   (atom []))
10 | 
11 | (defn forget
12 |   "Clear memory contents"
13 |   []
14 |   (reset! memory-store []))
15 | 
16 | (defn- retrieve-in-sequnce
17 |   "WIP. Candidate for `retrieval` ns to be reused accross memory systems"
18 |   [{object-limit r/memory-objects-limit
19 |     token-limit  r/memory-tokens-limit
20 |     :as          params} memories]
21 |   (cond->> memories
22 |     object-limit (take-last object-limit)
23 |     token-limit  (r/take-while-tokens
24 |                   (merge {wkk/model   :gpt-3.5-turbo
25 |                           wkk/service wkk/openai}
26 |                          params))))
27 | 
28 | (defn ->cue-memory
29 |   []
30 |   (fn [{mem-content-fn r/memory-content
31 |         threshold      r/content-similarity-threshold
32 |         :or            {threshold      1
33 |                         mem-content-fn identity}
34 |         :as            params}
35 |        cue]
36 |     (retrieve-in-sequnce
37 |      params
38 |      (filter #(> threshold (nlp/cosine-distance cue (mem-content-fn %)))
39 |              @memory-store))))
40 | 
41 | (defn ->remember
42 |   []
43 |   (fn [_opts observation]
44 |     (doseq [item (if (sequential? observation) observation [observation])]
45 |       (swap! memory-store conj item))))
46 | 


--------------------------------------------------------------------------------
/src/bosquet/mcp/client.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.mcp.client
 2 |   "MCP client that spawns and communicates with MCP server processes"
 3 |   (:require [bosquet.mcp.transport :refer [send-request send-notification close]]
 4 |             [bosquet.mcp.stdio-transport :as stdio-transport]))
 5 | 
 6 | (defn create-transport
 7 |   "Create appropriate transport based on config.
 8 |    Automatically starts processes for stdio transport."
 9 |   [{:keys [type] :as config}]
10 |   (case (or type :stdio)
11 |     :stdio (stdio-transport/create-stdio-transport config)
12 |     :http  (throw (ex-info "http: Not yet supported" {:type type :config config}))
13 |     (throw (ex-info "Unknown transport type" {:type type :config config}))))
14 | 
15 | (defn initialize
16 |   "Initialize MCP connection"
17 |   [transport]
18 |   (let [response (send-request transport "initialize"
19 |                                {:protocolVersion "2024-11-05"
20 |                                 :capabilities {:tools {}}
21 |                                 :clientInfo {:name "bosquet-mcp" :version "1.0.0"}})]
22 |     (send-notification transport "notifications/initialized" {})
23 |     response))
24 | 
25 | (defn list-tools
26 |   "List available tools"
27 |   [transport]
28 |   (get-in (send-request transport "tools/list" {}) [:result :tools]))
29 | 
30 | (defn call-tool
31 |   "Call a tool"
32 |   [transport tool-name arguments]
33 |   (get-in (send-request transport "tools/call"
34 |                         {:name tool-name :arguments arguments})
35 |           [:result :content]))
36 | 
37 | (defn shutdown
38 |   "Shutdown MCP connection"
39 |   [transport]
40 |   (close transport))
41 | 


--------------------------------------------------------------------------------
/src/bosquet/llm/localai.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.llm.localai
 2 |   (:require
 3 |    [bosquet.env :as env]
 4 |    [bosquet.llm.oai-shaped-llm :as oai]
 5 |    [bosquet.llm.wkk :as wkk]
 6 |    [bosquet.utils :as u]
 7 |    [wkok.openai-clojure.api :as api]
 8 |    [net.modulolotus.truegrit.circuit-breaker :as cb]))
 9 | 
10 | (def chat*
11 |   "Run 'chat' type completion. Pass in `messages` in ChatML format."
12 |   (cb/wrap (fn [{url :api-endpoint default-params :model-params :as service-cfg} params]
13 |              (u/log-call url params)
14 |              (-> params
15 |                  (oai/prep-params default-params)
16 |                  (api/create-chat-completion service-cfg)
17 |                  oai/->completion))
18 |            u/rest-service-cb))
19 | 
20 | (defn chat [params]
21 |   (chat* (wkk/localai env/config) params))
22 | 
23 | (def complete*
24 |   "Run 'completion' type generation.
25 |                          `params` needs to have `prompt` key.
26 |                        
27 |                          *Deprecated* by OAI?"
28 |   (cb/wrap (fn [{url :api-endpoint default-params :model-params :as service-cfg} params]
29 |              (u/log-call url params)
30 |              (-> params
31 |                  (oai/prep-params default-params)
32 |                  (api/create-completion service-cfg)
33 |                  oai/->completion))
34 |            u/rest-service-cb))
35 | 
36 | (defn complete [params]
37 |   (complete* (wkk/localai env/config) params))
38 | 
39 | (comment
40 |   (chat {:messages [{:role :user :content "2/2="}]})
41 |   (complete {:prompt "2+2=" wkk/model-params {:model :phi-4}})
42 |   (complete {:prompt "HOw are you doing?" wkk/model-params {:model :phi-4}})
43 |   #__)
44 | 


--------------------------------------------------------------------------------
/src/bosquet/llm/http.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.llm.http
 2 |   (:require [clj-http.client :as client]
 3 |             [bosquet.utils :as u]
 4 |             [taoensso.timbre :as timbre]
 5 |             [net.modulolotus.truegrit.circuit-breaker :as cb]))
 6 | 
 7 | (defn use-local-proxy
 8 |   "Use local proxy to log LLM API requests"
 9 |   ([] (use-local-proxy "localhost" 8080 "changeit"))
10 |   ([host port password]
11 |    (System/setProperty "javax.net.ssl.trustStore" (str (System/getProperty "user.home") "/.bosquet/keystore"))
12 |    (System/setProperty "javax.net.ssl.trustStorePassword" password)
13 |    (System/setProperty "https.proxyHost" host)
14 |    (System/setProperty "https.proxyPort" (str port))))
15 | 
16 | (defn post
17 |   ([url params] (post url nil params))
18 |   ([url http-opts params]
19 |    (u/log-call url params)
20 |    (try
21 |      (let [request  (merge {:content-type :json
22 |                             :accept       :json
23 |                             :body         (->> params u/snake-case u/write-json)}
24 |                            http-opts)
25 |            response (client/post url request)]
26 |        (-> response :body (u/read-json)))
27 |      (catch Exception e
28 |        (.printStackTrace e)
29 |        (let [{:keys [body status]}   (ex-data e)
30 |              {:keys [message error]} (u/read-json body)]
31 |          (timbre/error "Call failed")
32 |          (timbre/errorf "- HTTP status '%s'" status)
33 |          (timbre/errorf "- Error message '%s'" (or message error)))))))
34 | 
35 | (def resilient-post*
36 |   (cb/wrap (fn [& args]
37 |              (apply post args))
38 |            u/rest-service-cb))
39 | 
40 | (defn resilient-post [& args]
41 |   (apply resilient-post* args))
42 | 
43 | 


--------------------------------------------------------------------------------
/test/bosquet/memory/simple_memory_test.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.memory.simple-memory-test
 2 |   (:require
 3 |    [bosquet.memory.retrieval :as r]
 4 |    [bosquet.memory.simple-memory :as m]
 5 |    [clojure.test :as t]))
 6 | 
 7 | #_(t/deftest simple-memory-operations
 8 |     (let [mem (m/->remember)]
 9 |       (m/forget)
10 |       (mem nil "1")
11 |       (mem nil ["2" "3" "4" "5"])
12 |     ;; no limits specified return all
13 |       (t/is (= ["1" "2" "3" "4" "5"] (.sequential-recall mem {r/memory-content identity})))
14 |     ;; last 3 objects returned, no token limit
15 |       (t/is (= ["3" "4" "5"] (.sequential-recall mem {r/memory-content    identity
16 |                                                       r/memory-objects-limit 3})))
17 |     ;; object and token limitation
18 |       (t/is (= ["4" "5"] (.sequential-recall mem {r/memory-objects-limit 3
19 |                                                   r/memory-tokens-limit  3
20 |                                                   r/memory-content    identity})))
21 |       (t/is (= ["3" "4" "5"] (.sequential-recall mem {r/memory-objects-limit 3
22 |                                                       r/memory-tokens-limit  1000
23 |                                                       r/memory-content    identity})))))
24 | 
25 | (t/deftest cue-recall
26 |   (let [mem        (m/->remember)
27 |         cue        (m/->cue-memory)
28 |         sim-params {r/content-similarity-threshold 0.3}]
29 |     (m/forget)
30 |     (mem nil ["This is a car" "This is a bar" "The sky is dark" "Dark is the sky"])
31 |     (t/is (= ["This is a car" "This is a bar"]
32 |              (cue sim-params "This is a fox")))
33 |     (t/is (empty? (cue sim-params "Underground policemen's union")))))
34 | 


--------------------------------------------------------------------------------
/src/bosquet/llm/embeddings.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.llm.embeddings
 2 |   (:require
 3 |    [bosquet.env :as env]
 4 |    [bosquet.memory.encoding :as encoding]
 5 |    [wkok.openai-clojure.api :as api]))
 6 | 
 7 | (def ^:private openai-embedding-model "text-embedding-ada-002")
 8 | 
 9 | (defn oai-embeddings [text opts]
10 |   (api/create-embedding {:model openai-embedding-model
11 |                          :input text}
12 |                         opts))
13 | 
14 | (deftype
15 |  OAIEmbeddings
16 |  [opts]
17 |   encoding/Encoder
18 |   (encode [_this text] (oai-embeddings text opts)))
19 | 
20 | (comment
21 |   (require '[bosquet.db.qdrant :as qd])
22 | 
23 |   (def embeddings-collection-config
24 |     {:vectors-size     1536
25 |      :vectors-distance :Dot})
26 | 
27 |   (def qd-coll-name "test-embs")
28 |   (qd/create-collection qd-coll-name embeddings-collection-config)
29 | 
30 |   (def oai-emb (OAIEmbeddings. (:openai env/config)))
31 |   (def texts ["Hello world"
32 |               "Hello town"
33 |               "Goodmorning fields"
34 |               "Cars are driving on the road"])
35 |   (def embeds (mapv (fn [text]
36 |                       {:payload {:text text}
37 |                        :embedding
38 |                        (-> oai-emb (.encode text) :data first :embedding)})
39 |                     texts))
40 | 
41 |   (qd/add-docs qd-coll-name nil embeds)
42 | 
43 |   (def query (-> oai-emb (.encode "Cars in town") :data first :embedding))
44 | 
45 |   (qd/search qd-coll-name query 2)
46 | 
47 |   ;; Same but via Memory component
48 | 
49 |   (import 'bosquet.db.qdrant.Qdrant)
50 | 
51 |   (def qd (Qdrant. (:qdrant env/config)))
52 | 
53 |   (.create qd qd-coll-name)
54 |   (.add qd qd-coll-name embeds)
55 |   (.search qd qd-coll-name query 2)
56 | 
57 |   #__)
58 | 


--------------------------------------------------------------------------------
/demo/tree-prompt.edn:
--------------------------------------------------------------------------------
 1 | {:calc       ["Lets solve math problems."
 2 |               "Answer only with calculated result. Abstain from explanations or rephrasing the task!"
 3 |               "You are given the values:"
 4 |               "A = {{a}}; B = {{b}}; C = {{c}}"
 5 |               "Solve the following equations:"
 6 |               "{{tasks}}"
 7 |               "{{grade}}"]
 8 |  :p1         "A + B = {{x}}"
 9 |  :p2         "A - B = {{y}}"
10 |  :p3         "({{x}} + {{y}}) / C = {{z}}"
11 |  :tasks      ["{{p1}}" "{{p2}}" "{{p3}}"]
12 |  ; Those two can run in parallel
13 |  :eval1-role ["Evaluate if the solutions to the above equations are correct"
14 |               "{{eval1}}"]
15 |  :eval2-role ["Evaluate if the solutions to the above equations are calulated optimaly"
16 |               "{{eval2}}"]
17 |  ; Maybe an agent will want to add another evaluator if those two dissagree a lot
18 |  :grade      ["Evaluation A: {{eval1-role}}"
19 |               "Evaluation B: {{eval2-role}}"
20 |               "Based on this work grade (from 1 to 10) student's math knowledge."
21 |               "Give only grade number like '7' abstain from further explanations."
22 |               "{{score}}"]
23 | 
24 |  :x          #:llm{:service :mistral :model-params {:model :mistral-small :max-tokens 50}}
25 |  :y          #:llm{:service :mistral :model-params {:model :mistral-small :max-tokens 50}}
26 |  :z          #:llm{:service :mistral :model-params {:model :mistral-small :max-tokens 50}}
27 |  :eval1      #:llm{:service :openai :model-params {:model :gpt-4 :max-tokens 100}}
28 |  :eval2      #:llm{:service :openai :model-params {:model :gpt-4 :max-tokens 100}}
29 |  :score      #:llm{:service       :openai
30 |                    :model-params  {:model :gpt-3.5-turbo :max-tokens 2}
31 |                    :output-format :number ; NOTE: output format spec
32 |                    }}
33 | 


--------------------------------------------------------------------------------
/src/bosquet/agent/agent_mind_reader.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.agent.agent-mind-reader
 2 |   (:require
 3 |    [clojure.string :as string]))
 4 | 
 5 | (defn- normalize-action
 6 |   "Normalize `action` name to be used as a key to indicate what kind
 7 |   of action is requested."
 8 |   [action]
 9 |   (-> action string/lower-case string/trim keyword))
10 | 
11 | (defn- action-re
12 |   "Regex to find the action in the agent's mind when it is in a `cycle`"
13 |   [cycle]
14 |   (re-pattern
15 |     ;; 'Observation' at the end is optional because there will be none when 'Action=Finish'
16 |    (format "(?s).*?(Thought %s:.*?)(Action %s:(.*?)\\[(.*?)\\])(\\nObservation %s:)?"
17 |            cycle cycle cycle)))
18 | 
19 | (defn find-action
20 |   "Read agent's thoughts and actions. Find the action in its `cycle` of thinking."
21 |   [step agent-mind]
22 |   (let [[_ thought action action-verb action-param] (re-find (action-re step) agent-mind)]
23 |     {:thought    (string/trim (str thought action))
24 |      :action     (normalize-action action-verb)
25 |      :parameters (string/trim action-param)}))
26 | 
27 | (defn split-sentences
28 |   "Split `text` into sentences."
29 |   [text]
30 |   ;; Naive regex based implementation
31 |   (string/split text #"(?s)(?<=[^A-Z].[.?])\s+(?=[A-Z])"))
32 | 
33 | (defn lookup-index
34 |   "Construct a `query` lookup index for the `content`.
35 |   It will return a seqence of triplets.
36 |   ```
37 |   [sentence-index has-query sentence]
38 |   ```
39 |   where `sentence-index` is the index of the sentence in the `content`,
40 |   `has-query` is a boolean indicating if the sentence contains the `query`,
41 |   `sentence` is the sentence itself."
42 |   [query content]
43 |   (vec
44 |    (map-indexed
45 |     (fn [idx sentence]
46 |       [idx
47 |        (string/includes? (string/lower-case sentence) (string/lower-case query))
48 |        sentence])
49 |     (split-sentences content))))
50 | 


--------------------------------------------------------------------------------
/notebook/index.clj:
--------------------------------------------------------------------------------
 1 | ^{:nextjournal.clerk/visibility :hide}
 2 | (ns ^:nextjournal.clerk/no-cache index
 3 |   (:require [nextjournal.clerk :as clerk]))
 4 | 
 5 | (clerk/html
 6 |  [:div.viewer-markdown
 7 |   [:ul
 8 |    [:li [:a.underline {:href (clerk/doc-url "notebook/user_guide/index.html")} "User guide"]]
 9 |    [:li [:a.underline {:href (clerk/doc-url "notebook/text_splitting/index.html")} "Text Chunking"]]
10 |    [:li [:a.underline {:href (clerk/doc-url "notebook/memory_prosocial_dialog/index.html")} "Long and short-term memory use"]]
11 |    [:li [:a.underline {:href (clerk/doc-url "notebook/document_loading/index.html")} "Document Loading"]]
12 |    [:li [:a.underline {:href (clerk/doc-url "notebook/observability/index.html")} "Observability"]]
13 |    [:li
14 |     [:div "Examples"]
15 |     [:ul
16 |      [:li [:a.underline {:href (clerk/doc-url "notebook/examples/math_generate_code/index.html")} "Math calc with generated code"]]
17 |      [:li [:a.underline {:href (clerk/doc-url "notebook/examples/writing_letters/index.html")} "Writing letters"]]]]
18 |    [:li
19 |     [:div "Paper Implementations"]
20 |     [:ul
21 |      [:li [:a.underline {:href (clerk/doc-url "notebook/papers/chain_of_density/index.html")} "Chain of Density"]]
22 |      [:li [:a.underline {:href (clerk/doc-url "notebook/papers/chain_of_verification/index.html")} "Chain of Verification"]]]]
23 |    [:li
24 |     [:div "Presentations"]
25 |     [:ul
26 |      [:li [:a.underline {:href "https://clojureverse.org/t/scicloj-llm-meetup-3-llmops-with-bosquet-summary-recording/"} "2023-06-17: LLMOps with Bosquet (Scicloj)"]]
27 |      [:li [:a.underline {:href "https://clojureverse.org/t/scicloj-llm-meetup-6-implementing-research-papers-with-bosquet-summary-recording/"} "2023-11-17: Implementing research papers with Bosquet (Scicloj)"]]
28 |      [:li [:a.underline {:href "https://www.youtube.com/watch?v=ywlNGiD9gCg"} "Bosquet LLM command line interface and observability tools"]]]
29 |     ]]])
30 | 


--------------------------------------------------------------------------------
/test/bosquet/llm/oai_shaped_llm_test.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.llm.oai-shaped-llm-test
 2 |   (:require
 3 |    [bosquet.llm.oai-shaped-llm :as oai]
 4 |    [bosquet.llm.wkk :as wkk]
 5 |    [clojure.test :refer [deftest is]]))
 6 | 
 7 | (deftest prep-params-test
 8 |   (is (= {}
 9 |          (oai/prep-params {} {})))
10 |   (is (= {:max-tokens 10}
11 |          (oai/prep-params {:max-tokens 10})))
12 |   (is (= {:cache      true
13 |           :model      :gpt-10
14 |           :max-tokens 1}
15 |          (oai/prep-params
16 |           {wkk/model-params {:model :gpt-10 :max-tokens 1}}
17 |           {wkk/model-params {:model :gpt-100}
18 |            :cache           true}))))
19 | 
20 | (deftest completion-normalization
21 |   (let [txt       "Hello there, how may I assist you today?"
22 |         usage-in  {:prompt_tokens 5 :completion_tokens 7 :total_tokens 12}
23 |         usage-out {:prompt 5 :completion 7 :total 12}]
24 |     (is (= {wkk/content         {oai/role oai/assistant oai/content txt}
25 |             wkk/usage           usage-out
26 |             wkk/generation-type :chat}
27 |            (oai/->completion {:model   "gpt-3.5-turbo"
28 |                               :object  "chat.completion"
29 |                               :choices [{:index         0
30 |                                          :message       {:role "assistant" :content txt}
31 |                                          :finish_reason "stop"}]
32 |                               :usage   usage-in})))
33 |     (is (= {wkk/content         txt
34 |             wkk/usage           usage-out
35 |             wkk/generation-type :completion}
36 |            (oai/->completion {:object  "text_completion"
37 |                               :model   "gpt-3.5-turbo"
38 |                               :choices [{:text          txt
39 |                                          :index         0
40 |                                          :logprobs      nil
41 |                                          :finish_reason "length"}]
42 |                               :usage   usage-in})))))
43 | 


--------------------------------------------------------------------------------
/notebook/examples/tree_prompts.clj:
--------------------------------------------------------------------------------
 1 | (ns examples.tree-prompts
 2 |   (:require
 3 |    [bosquet.llm.generator :refer [llm generate]]
 4 |    [bosquet.llm.wkk :as wkk]))
 5 | 
 6 | ;; TODO
 7 | 
 8 |   (def solver (llm :openai wkk/model-params {:model :gpt-4 :max-tokens 50}))
 9 | 
10 |   (def g {:calc       ["Lets solve math problems."
11 |                        "Answer only with calculated result. Abstain from explanations or rephrasing the task!"
12 |                        "You are given the values:"
13 |                        "A = {{a}}; B = {{b}}; C = {{c}}"
14 |                        "Solve the following equations:"
15 |                        "{{tasks}}"
16 |                        "{{grade}}"]
17 |           :tasks      ["{{p1}}" "{{p2}}" "{{p3}}"]
18 |           :p1         "A + B = {{x}}"
19 |           :p2         "A - B = {{y}}"
20 |           :p3         "({{x}} + {{y}}) / C = {{z}}"
21 |           :eval1-role ["{{tasks}}"
22 |                        "Evaluate if the solutions to the above equations are correct"
23 |                        "{{eval1}}"]
24 |           :eval2-role ["{{tasks}}"
25 |                        "Evaluate if the solutions to the above equations are calulated optimaly"
26 |                        "{{eval2}}"]
27 |           :grade      ["Based on the following evaluations to math problems:"
28 |                        "Evaluation A: {{eval1-role}}"
29 |                        "Evaluation B: {{eval2-role}}"
30 |                        "Based on this work grade (from 1 to 10) student's math knowledge."
31 |                        "Give only grade number like '7' abstain from further explanations."
32 |                        "{{score}}"]
33 |           :x          solver
34 |           :y          solver
35 |           :z          solver
36 |           :eval1      (llm :mistral wkk/model-params {:model :mistral-small :max-tokens 50})
37 |           :eval2      (llm :mistral wkk/model-params {:model :mistral-small :max-tokens 50})
38 |           :score      (llm :openai
39 |                            wkk/output-format :number
40 |                            wkk/model-params {:model :gpt-4 :max-tokens 2})})
41 | 
42 |   (generate g {:a 5 :b 2 :c 1})
43 | 


--------------------------------------------------------------------------------
/test/bosquet/converter_test.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.converter-test
 2 |   (:require
 3 |    [bosquet.converter :as c]
 4 |    [bosquet.utils :as u]
 5 |    [clojure.test :refer [deftest is testing]]))
 6 | 
 7 | (deftest converting-lists
 8 |   (testing "numbered lists and edge cases"
 9 |     (is (= ["foo" "bar" "baz"]
10 |            (c/list-reader "1. foo\n2. bar\n3. baz")))
11 |     (is (= ["foo1.1" "bar" "baz"]
12 |            (c/list-reader "1. foo1.1 \n2. bar\n3. baz")))
13 |     (is (= ["foo" "bar" "baz"]
14 |            (c/list-reader "\n\n1. foo\n2. bar\n3. baz"))))
15 |   (testing "numbered unordered lists"
16 |     (is (= ["foo" "bar" "baz"]
17 |            (c/list-reader "* foo\n* bar\n* baz")))
18 |     (is (= ["foo" "bar" "baz"]
19 |            (c/list-reader "- foo\n- bar\n- baz")))))
20 | 
21 | (deftest converting-yes-and-noes
22 |   (is (true? (c/->bool "yes")))
23 |   (is (true? (c/->bool "YES")))
24 |   (is (false? (c/->bool "     nO    ")))
25 |   (is (false? (c/->bool "NO")))
26 |   (is (= "X" (c/->bool "X")))
27 | 
28 |   (is (true? (c/->bool "tRue")))
29 |   (is (false? (c/->bool " FALSE"))))
30 | 
31 | (deftest coerce-test
32 |   (is (= "Dogs are great!" (c/coerce nil "Dogs are great!")))
33 |   (is (= "Dogs are great!" (c/coerce :pdf "Dogs are great!")))
34 |   (is (= [{"x" 1.2 "y" 0.8}] (c/coerce :json "[{\"x\" : 1.2, \"y\" : 0.8}]")))
35 |   (is (= ["1" "2"] (c/coerce #(vec (re-seq #"\d+" %)) "Answer is 1 and 2"))))
36 | 
37 | (deftest reading-edn
38 |   (is (= [1 2] (c/edn-reader "[1 2]")))
39 |   (is (= [1 2] (c/edn-reader (u/join-lines "```clojure" "[1 2]" "```"))))
40 |   (is (= :edn (c/edn-reader (u/join-lines "```edn" ":edn" "```")))))
41 | 
42 | (deftest reading-json
43 |   (is (= [1 2] (c/json-reader "[1, 2]")))
44 |   (is (= {"a" 2} (c/json-reader (u/join-lines "Junk\nmore of it\n```json" "{\"a\" : 2}" "```\njunk"))))
45 |   (is (= 1 (c/json-reader (u/join-lines "```json" "1" "```")))))
46 | 
47 | (deftest converting-numbers
48 |   (is (= "x" (c/->number "x")))
49 |   (is (= 1 (c/->number "1")))
50 |   (is (= 1.1 (c/->number "1.1")))
51 |   (is (= 101.09 (c/->number "101.09")))
52 |   (is (= 0.1 (c/coerce :number "0.100"))))
53 | 
54 | (deftest failing-conversions
55 |   (is (= "[1" (c/coerce :json "[1"))))
56 | 


--------------------------------------------------------------------------------
/src/bosquet/converter.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.converter
 2 |   (:require
 3 |    [clojure.string :as s]
 4 |    [jsonista.core :as j]
 5 |    [taoensso.timbre :as timbre]))
 6 | 
 7 | ;; WIP - a place to start building output conversion functions
 8 | 
 9 | (defn- drop-digit [item]
10 |   (s/trim (s/replace-first item #"^((\d+\.)|\-|\*) " "")))
11 | 
12 | (defn list-reader
13 |   "Converts numbered item list given as a new line
14 |   separated string to a list
15 | 
16 |   1. foo
17 |   2. bar
18 |   3. baz
19 | 
20 |   '-' and '*' works for unordered lists
21 |   =>
22 |   [\"foo\" \"bar\" \"baz\"]"
23 |   [items]
24 |   (map drop-digit
25 |        (s/split (s/trim items) #"\n")))
26 | 
27 | (defn ->bool
28 |   "Converts yes/no answer to boolean
29 | 
30 |    Yes => true
31 |    NO => false"
32 |   [answer]
33 |   (condp = (-> answer s/trim s/lower-case)
34 |     "yes"   true
35 |     "true"  true
36 |     "no"    false
37 |     "false" false
38 |     answer))
39 | 
40 | (defn ->number
41 |   [num]
42 |   (cond
43 |     (re-matches #"\d+" num) (Integer/parseInt num)
44 |     (re-matches #"\d+(\.\d+)?" num) (Double/parseDouble num)
45 |     :else num))
46 | 
47 | (defn json-reader
48 |   "Some models (GPT-3.5-*, Cohere) tend to wrap response with Makrdown code
49 |   ```json
50 |   GOOD JSON CONTENT
51 |   ```
52 |   Strip that markdown"
53 |   [completion]
54 |   (-> completion
55 |       (s/replace #"(?ms).*?```json" "")
56 |       (s/replace #"(?ms)```" "")
57 |       (j/read-value)))
58 | 
59 | (defn edn-reader
60 |   "GPT-3.5-* tends to wrap response with Makrdown code
61 |   ```edn OR clojure
62 |   GOOD EDN CONTENT
63 |   ```
64 |   Strip that markdown
65 |   "
66 |   [completion]
67 |   (-> completion
68 |       (s/replace #"(?ms)^```(edn|clojure)" "")
69 |       (s/replace #"(?ms)```$" "")
70 |       (read-string)))
71 | 
72 | (defn coerce
73 |   [format completion]
74 |   (try
75 |     (if (fn? format)
76 |       (format completion)
77 |       (condp = format
78 |         :json (json-reader completion)
79 |         :edn  (edn-reader completion)
80 |         :list (list-reader completion)
81 |         :number (->number completion)
82 |         :bool (->bool completion)
83 |         completion))
84 |     (catch Exception e
85 |       (timbre/error (format "Coercion error '%s'. Returning generated data withouth coercion" (.getMessage e)))
86 |       completion)))
87 | 


--------------------------------------------------------------------------------
/src/bosquet/mcp/stdio_transport.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.mcp.stdio-transport
 2 |   (:require [clojure.java.io :as io]
 3 |             [jsonista.core :as json]
 4 |             [taoensso.timbre :as timbre]
 5 |             [bosquet.mcp.transport :refer [MCPTransport]])
 6 |   (:import [java.io BufferedReader BufferedWriter]))
 7 | 
 8 | (def mapper (json/object-mapper {:decode-key-fn true}))
 9 | 
10 | (defn- start-process
11 |   "Start a subprocess for stdio communication"
12 |   [{:keys [command args env]}]
13 |   (let [pb (ProcessBuilder. (into-array String (cons command args)))]
14 |     (when env
15 |       (let [process-env (.environment pb)]
16 |         (doseq [[k v] env]
17 |           (.put process-env (str k) (str v)))))
18 |     (.start pb)))
19 | 
20 | (defrecord StdioTransport [process]
21 |   MCPTransport
22 |   (send-request [_ method params]
23 |     (let [request                {:jsonrpc "2.0"
24 |                                   :id      (System/currentTimeMillis)
25 |                                   :method  method
26 |                                   :params  (or params {})}
27 |           ^BufferedWriter writer (io/writer (.getOutputStream process))
28 |           ^BufferedReader reader (io/reader (.getInputStream process))]
29 | 
30 |       (let [request-json (json/write-value-as-string request)]
31 |         (timbre/debug "STDIO →" request-json)
32 |         (.write writer request-json)
33 |         (.newLine writer)
34 |         (.flush writer))
35 | 
36 |       (let [response-json (.readLine reader)
37 |             response      (json/read-value response-json mapper)]
38 |         (timbre/debug "STDIO ←" response-json)
39 |         response)))
40 | 
41 |   (send-notification [_ method params]
42 |     (let [notification           {:jsonrpc "2.0"
43 |                                   :method  method
44 |                                   :params  (or params {})}
45 |           ^BufferedWriter writer (io/writer (.getOutputStream process))
46 |           notification-json      (json/write-value-as-string notification)]
47 |       (timbre/debug "STDIO → (notification)" notification-json)
48 |       (.write writer notification-json)
49 |       (.newLine writer)
50 |       (.flush writer)))
51 | 
52 |   (close [_]
53 |     (.destroy process)))
54 | 
55 | (defn create-stdio-transport
56 |   "Create a stdio transport (starts the process)"
57 |   [config]
58 |   (->StdioTransport (start-process config)))
59 | 


--------------------------------------------------------------------------------
/src/bosquet/template/read.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.template.read
 2 |   (:require
 3 |    [bosquet.llm.wkk :as wkk]
 4 |    [bosquet.template.selmer :as selmer]
 5 |    [bosquet.utils :as u]
 6 |    [clojure.edn :as edn]
 7 |    [clojure.java.io :as io]
 8 |    [clojure.set :as set]
 9 |    [clojure.string :as string]
10 |    [taoensso.timbre :as timbre]))
11 | 
12 | (defn read-edn [reader]
13 |   (edn/read (java.io.PushbackReader. reader)))
14 | 
15 | (defn load-prompt-palette-edn [file]
16 |   (timbre/info "Read prompts from: " (.getName file))
17 |   (with-open [rdr (io/reader file)]
18 |     (reduce-kv (fn [m k v] (assoc m k
19 |                                   (if (sequential? v) (u/join-coll v) v)))
20 |                {} (read-edn rdr))))
21 | 
22 | (defn- edn-file? [file] (string/ends-with? (.getName file) ".edn"))
23 | 
24 | (defn load-palettes
25 |   "Build a map of all the prompt palletes defined in `dir`.
26 |   It will read all EDN files in that dir and construct mapping
27 |   where key is file name and content is patterns defined in that file."
28 |   [dir]
29 |   (->> (io/file dir)
30 |        (file-seq)
31 |        (filter edn-file?)
32 |        (reduce
33 |         (fn [m file] (merge m (load-prompt-palette-edn file)))
34 |         {})))
35 | 
36 | (defn data-slots
37 |   "Extract data slots that are defined in the template, chat, or graph context.
38 |   This will ignore all the self references and generation slots,
39 |   only return slots that are suplied as data and not defined in prompts."
40 |   [tpl-chat-or-graph]
41 |   ;; Different processing is needed for map based graph prompts
42 |   ;; and chats
43 |   (let [templates     (cond
44 |                         (string? tpl-chat-or-graph) [tpl-chat-or-graph]
45 |                         (map? tpl-chat-or-graph)    (->> tpl-chat-or-graph vals (map u/join-coll))
46 |                         (vector? tpl-chat-or-graph) (map (fn [[_ content]]
47 |                                                            (u/join-coll content)) tpl-chat-or-graph))
48 |         non-data-refs (set (cond
49 |                              (map? tpl-chat-or-graph)    (keys tpl-chat-or-graph)
50 |                              (vector? tpl-chat-or-graph) (map (fn [[_ content]]
51 |                                                                 (when (map? content) (wkk/var-name content)))
52 |                                                               tpl-chat-or-graph)))
53 |         slots         (selmer/known-variables templates)]
54 |     (set/difference slots non-data-refs)))
55 | 


--------------------------------------------------------------------------------
/src/bosquet/agent/wikipedia.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.agent.wikipedia
 2 |   (:require
 3 |    [bosquet.agent.tool :as t]
 4 |    [jsonista.core :as j]
 5 |    [clj-http.client :as http]))
 6 | 
 7 | (defn- read-json [json]
 8 |   (j/read-value json (j/object-mapper {:decode-key-fn true})))
 9 | 
10 | (defn call-wiki [params]
11 |   (->> (http/request
12 |         {:method       :get
13 |          :url          "https://en.wikipedia.org/w/api.php"
14 |          :query-params params})
15 |        :body read-json))
16 | 
17 | (defn search-wiki-titles
18 |   "Searh Wikipedia for `query` and return a vector of tuples `[title link]`"
19 |   [query]
20 |   ;; Wikipedia API returns a vector of 4 items:
21 |   ;; 1. query
22 |   ;; 2. titles of matching articlesA
23 |   ;; 3. short descriptions of matching articles (?)
24 |   ;; 4. links to matching articles
25 |   ;; We only care about the second and last items.
26 |   (second (call-wiki {"search" query
27 |                       "limit"  3
28 |                       "action" "opensearch"})))
29 | 
30 | (defn fetch-page
31 |   ([title] (fetch-page title {}))
32 |   ([title {:keys [n-sentences format]
33 |            :or   {n-sentences 5 format "json"}}]
34 |    (-> (call-wiki
35 |         {:action      "query"
36 |          :titles      title
37 |          :prop        "extracts"
38 |          :exsentences n-sentences
39 |          :explaintext "yes"
40 |          :exintro     "yes"
41 |          :format      format})
42 |        :query :pages vec first second :extract)))
43 | 
44 | (defn best-match
45 |   "`query` is a string used to search Wikipedia in `search-wiki` call
46 |    `results` is a vector of tuples `[title link]`
47 | 
48 |   Best match is determined by the following criteria:
49 |   - If there is exact match between `query` and `title`, return it
50 |   - Otherwise return the first result (trusting Wikipedia's search algorithm)"
51 |   [query results]
52 |   (if-let [exact-match (get (set results) query)]
53 |     exact-match
54 |     (first results)))
55 | 
56 | (defn extract-page-content [query]
57 |   (fetch-page
58 |    (best-match
59 |     query
60 |     (search-wiki-titles query))))
61 | 
62 | (deftype Wikipedia
63 |          [] t/Tool
64 |          (my-name [_this] "Wikipedia")
65 |          (search [_this {query :parameters}]
66 |            (extract-page-content query))
67 |          (lookup [_this _ctx])
68 |          (finish [_this {query :parameters}]
69 |            query))
70 | 
71 | (comment
72 |   (def w (Wikipedia.))
73 |   (t/my-name w)
74 |   (t/search w {:parameters "David Chanoff"})
75 |   #__)
76 | 


--------------------------------------------------------------------------------
/src/bosquet/read/document.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.read.document
 2 |   "Document reader wrapping Apache Tika parser
 3 |   https://tika.apache.org/
 4 | 
 5 |   Tika supports lots of different file formats
 6 |   https://tika.apache.org/2.9.1/formats.html
 7 | 
 8 |   `parse` function will use Tika capabilities to convert provided
 9 |   document data to a map containing `text` and `metadata` fields.
10 | 
11 |   ```clojure
12 |   (parse (clojure.java.io/input-stream \"data/memory.pdf\"))
13 |   =>
14 |   {:text     \"Memory, reasoning, and categorization: parallels and\ncommon mechanisms\nBrett K. ...\"
15 |    :metadata {:dc:creator \"Brett K. Hayes\"
16 |               :dc:description \"Traditionally, memory, reasoning, and categorization have been \"}
17 |   ```"
18 |   (:require
19 |    [clojure.java.io :as io])
20 |   (:import
21 |    [org.apache.tika.metadata Metadata]
22 |    [org.apache.tika.parser AutoDetectParser]
23 |    [org.apache.tika.sax BodyContentHandler]))
24 | 
25 | (defn- extract-metadata
26 |   "Convert tika Metadata object into plain map."
27 |   [metadata]
28 |   (reduce (fn [m k]
29 |             (assoc m (keyword k)
30 |                    (if (.isMultiValued metadata k)
31 |                      (into [] (.getValues metadata k))
32 |                      (.get metadata k))))
33 |           {}
34 |           (.names metadata)))
35 | 
36 | (defn- body-content-handler
37 |   []
38 |   (let [;; BodyContentHandler will process only specified number of characters,
39 |         ;; this is a guard against parsing huge files note, that it is file content chars
40 |         ;; so in cases of binary files it is not your letters
41 |         ;; -1 means no limit
42 |         file-char-limit -1]
43 |     (BodyContentHandler. file-char-limit)))
44 | 
45 | (defn parse
46 |   "Extract text and metadata from `doc-input-stream`. The stream can contain
47 |   and data of a file formats supported by Tika. File format detection will be
48 |   done automatcaly by Tika.
49 | 
50 |   Returns a map with
51 |   - `text` field containing document in a plain text format
52 |   - `metadata` Dublin Core defined metadata fields if document has those defined"
53 |   [stream-or-file-name]
54 |   (let [input    (if (string? stream-or-file-name)
55 |                    (io/input-stream stream-or-file-name)
56 |                    stream-or-file-name)
57 |         parser   (AutoDetectParser.)
58 |         handler  (body-content-handler)
59 |         metadata (Metadata.)]
60 |     (.parse parser input handler metadata)
61 |     {:text     (.toString handler)
62 |      :metadata (extract-metadata metadata)}))
63 | 


--------------------------------------------------------------------------------
/dev/user.clj:
--------------------------------------------------------------------------------
 1 | (ns user
 2 |   #_{:clj-kondo/ignore [:unused-namespace]}
 3 |   (:require
 4 |    [clojure.string :as string]
 5 |    [nextjournal.clerk :as clerk]
 6 |    [portal.api :as p]
 7 |    [taoensso.timbre :as timbre]))
 8 | 
 9 | (defn log-output-fn
10 |   [data]
11 |   (let [{:keys [level ?err ?ns-str ?file timestamp_ ?line output-opts]} data
12 |         context  (format "%s %s [%s:%3s]:"
13 |                    (force timestamp_)
14 |                    (-> level name string/upper-case)
15 |                    (or ?ns-str ?file "?") (or ?line "?"))]
16 |     (format
17 |       "%-42s %s%s"
18 |       context
19 |       (if-let [msg-fn (get output-opts :msg-fn timbre/default-output-msg-fn)]
20 |         (msg-fn data) "")
21 |       (if ?err
22 |         ((get output-opts :error-fn timbre/default-output-error-fn) data) ""))))
23 | 
24 | 
25 | (timbre/merge-config! {:output-fn log-output-fn
26 |                        :timestamp-opts {:pattern "HH:mm:ss"}})
27 | 
28 | (defn build-static-docs
29 |   [_]
30 |   (clerk/build! {:paths    ["notebook/user_guide.clj"
31 |                             "notebook/text_splitting.clj"
32 |                             "notebook/document_loading.clj"
33 |                             "notebook/observability.clj"
34 |                             "notebook/examples/math_generate_code.clj"
35 |                             "notebook/examples/writing_letters.clj"
36 |                             "notebook/memory_prosocial_dialog.clj"
37 |                             "notebook/papers/chain_of_density.clj"
38 |                             "notebook/papers/chain_of_verification.clj"]
39 |                  :index    "notebook/index.clj"
40 |                  :out-path "docs"}))
41 | 
42 | (defn open-portal []
43 |   (p/open)
44 |   (add-tap #'p/submit))
45 | 
46 | 
47 | (defn clear-portal []
48 |   (p/clear))
49 | 
50 | (comment
51 |   (open-portal)
52 | 
53 |   (clerk/serve! {:watch-paths ["notebook"]})
54 | 
55 |   (clerk/serve! {:browse? false})
56 | 
57 |   (clerk/show! "notebook/getting_started.clj")
58 |   (clerk/show! "notebook/text_splitting.clj")
59 |   (clerk/show! "notebook/document_loading.clj")
60 |   (clerk/show! "notebook/using_llms.clj")
61 |   (clerk/show! "notebook/examples/short_memory_prosocial_dialog.clj")
62 |   (clerk/show! "notebook/papers/chain_of_verification.clj")
63 |   (clerk/show! "notebook/configuration.clj")
64 |   (clerk/show! "notebook/user_guide.clj")
65 |   (clerk/show! "notebook/chat_with_memory.clj")
66 |   (clerk/show! "notebook/text_analyzers.clj")
67 |   (clerk/show! "notebook/wedding_guest_example.clj")
68 |   (clerk/show! "notebook/named_entity_processing.clj")
69 |   #__)
70 | 


--------------------------------------------------------------------------------
/test/bosquet/nlp/splitter_test.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.nlp.splitter-test
 2 |   (:require
 3 |    [bosquet.nlp.splitter :as split]
 4 |    [clojure.string :as string]
 5 |    [clojure.test :refer [deftest is]]))
 6 | 
 7 | (deftest splitting-by-tokens
 8 |   (is (= ["Think not, is my eleventh commandment;"
 9 |           "ment; and sleep when you can, is my"
10 |           " is my twelfth."]
11 |          (split/chunk-text
12 |           {split/chunk-size 10
13 |            split/overlap    2
14 |            split/split-unit split/token
15 |            split/model      :gpt-4}
16 |           "Think not, is my eleventh commandment; and sleep when you can, is my twelfth."))))
17 | 
18 | (deftest splitting-by-characters
19 |   (is (= ["Never attempt to win by force "
20 |           "what can be won by deception"]
21 |          (split/chunk-text
22 |           {split/chunk-size 30 split/split-unit split/character}
23 |           "Never attempt to win by force what can be won by deception")))
24 |   (is (= ["Never attempt to win by force "
25 |           "e what can be won by deception"
26 |           "on"]
27 |          (split/chunk-text
28 |           {split/chunk-size 30 split/overlap 2 split/split-unit split/character}
29 |           "Never attempt to win by force what can be won by deception"))))
30 | 
31 | (deftest splitting-by-sentence
32 |   (with-redefs [split/split-handlers
33 |                 (assoc-in split/split-handlers
34 |                           [split/sentence :encode]
35 |                           (fn [_ txt]
36 |                             (mapv
37 |                              (fn [snt] (str (string/trim snt) "."))
38 |                              (string/split txt #"\."))))]
39 |     (let [text (str
40 |                 "Jenny lost keys. Panic rises. Frantic search begins." " "
41 |                 "Couch cushions invaded. Discovery: in pocket.")]
42 |       (is (= ["Jenny lost keys. Panic rises. Frantic search begins."
43 |               "Couch cushions invaded. Discovery: in pocket."]
44 |              (split/chunk-text {split/chunk-size 3 split/split-unit split/sentence} text)))
45 |       (is (= ["Jenny lost keys. Panic rises. Frantic search begins."
46 |               "Frantic search begins. Couch cushions invaded. Discovery: in pocket."
47 |               "Discovery: in pocket."]
48 |              (split/chunk-text
49 |               {split/chunk-size 3 split/overlap 1 split/split-unit split/sentence}
50 |               text)))
51 |       (is (= ["Jenny lost keys. Panic rises. Frantic search begins. Couch cushions invaded. Discovery: in pocket."]
52 |              (split/chunk-text {split/chunk-size 30 split/split-unit split/sentence} text))))))
53 | 


--------------------------------------------------------------------------------
/notebook/examples/function_nodes.clj:
--------------------------------------------------------------------------------
 1 | (ns examples.function-nodes
 2 |   (:require
 3 |    [bosquet.llm.generator :as g]
 4 |    [bosquet.llm.wkk :as wkk]))
 5 | 
 6 | ;; Bosquet prompts are defined in a map, where relationship between
 7 | ;; prompt components are resolved when constructing the output.
 8 | ;;
 9 | ;; An entry in a map with `:llm/service` value defines an LLM call that will
10 | ;; recieve a context from already resolved text in the prompt map.
11 | 
12 | {:repeat   "Repeat 'X' {{number}} times: {{repeater}}"
13 |  :repeater #:llm{:service      :mistral
14 |                  :model-params {:model :mistral-small}}}
15 | 
16 | ;; The concept of calling an LLM is further abstracted into defining any
17 | ;; function call. This allows the integration of any data extraction
18 | ;; functionality into the prompting flow.
19 | 
20 | {:rnumber  #:fun{:impl (g/fun (fn [n] (rand-int n)) ['n])
21 |                  :args ['n]}
22 |  :repeat   "Repeat 'X' {{number}} times: {{repeater}}"
23 |  :repeater #:llm{:service      :mistral
24 |                  :model-params {:model :mistral-small}}}
25 | 
26 | ;; The function call definition requires two values:
27 | ;; - a function itself under `fun` key
28 | ;; - arguments to that function under `args` key,
29 | ;;
30 | ;; `generator` namespace has helper functions to define those nodes
31 | 
32 | (g/generate
33 |  {:repeat   "Repeat 'X' {{number}} times: {{repeater}}"
34 |   :number   (g/fun (fn [n] (rand-int n)) ['n])
35 |   :repeater (g/llm :mistral-small)}
36 |  {:n 5})
37 | 
38 | ;; An example is where function and llm invocation nodes use the data they produce.
39 | 
40 | (g/generate
41 |  {:format     "EDN"
42 |   :astronomer ["As a brilliant astronomer, list distances between planets and the Sun"
43 |                "in the Solar System. Provide the answer in {{format}} map where the key is the"
44 |                "planet name and the value is the number of the distance in millions of kilometers."
45 |                "Generate only {{format}} omit any other prose and explanations."
46 |                "{{distances}}"
47 |                "Based on the distances data we know that the average min and max distances are:"
48 |                "{{analysis}}"
49 |                ]
50 |   :distances  (g/llm :gpt-4
51 |                      wkk/cache         true
52 |                      wkk/output-format :edn
53 |                      wkk/model-params {:max-tokens 300 :model :gpt-4})
54 |   :analysis   (g/fun (fn [d]
55 |                        [(-> d vals min) (-> d vals max)])
56 |                      ['distances])
57 |   #_          (llm wkk/mistral
58 |                    wkk/model-params {:model :mistral-small})})
59 | 


--------------------------------------------------------------------------------
/src/bosquet/llm/openai.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.llm.openai
 2 |   (:require
 3 |    [bosquet.env :as env]
 4 |    [bosquet.llm.oai-shaped-llm :as oai]
 5 |    [bosquet.llm.wkk :as wkk]
 6 |    [bosquet.utils :as u]
 7 |    [wkok.openai-clojure.api :as api]
 8 |    [bosquet.llm.tools :as tools]
 9 |    [net.modulolotus.truegrit.circuit-breaker :as cb]))
10 | 
11 | (defn chat*
12 |   [service-cfg params]
13 |   (let [tools (map tools/tool->function (wkk/tools params))
14 |         tool-defs (wkk/tools params)
15 |         gen-fn (cb/wrap (fn [{url :api-endpoint default-params :model-params :as service-cfg} params]
16 |                           (u/log-call url params)
17 |                           (let [params (cond-> params
18 |                                          true (oai/prep-params default-params)
19 |                                          (not-empty tools) (assoc :tools tools))]
20 |                             (-> params
21 |                                 (api/create-chat-completion service-cfg))))
22 |                         u/rest-service-cb)]
23 |     (-> (gen-fn service-cfg params)
24 |         (tools/apply-tools wkk/openai params tool-defs (partial gen-fn service-cfg))
25 |         oai/->completion)))
26 | 
27 | (defn chat
28 |   "Run 'chat' type completion. Pass in `messages` in ChatML format."
29 |   ([service-cfg params] (chat* service-cfg params))
30 |   ([params] (chat (wkk/openai env/config) params)))
31 | 
32 | (def complete*
33 |   "Run 'completion' type generation. `params` needs to have `prompt` key."
34 |   (cb/wrap (fn [{url :api-endpoint default-params :model-params :as service-cfg} params]
35 |              (u/log-call url params)
36 |              (-> params
37 |                  (oai/prep-params default-params)
38 |                  (api/create-completion service-cfg)
39 |                  oai/->completion))
40 |            u/rest-service-cb))
41 | 
42 | (defn complete
43 |   ([service-cfg params] (complete* service-cfg params))
44 |   ([params] (complete (wkk/openai env/config) params)))
45 | 
46 | (comment
47 |   (require '[bosquet.tool.math :refer [add sub]]
48 |            '[bosquet.tool.weather :refer [get-current-weather]])
49 |   (tools/tool->function #'get-current-weather)
50 |   (tools/tool->function #'add)
51 |   (tools/tool->function #'sub)
52 |   (chat {:messages [{:role :user :content "2/2="}]})
53 |   (chat {:messages [{:role :user :content "Whats 2 plus 2 minus 3"}] wkk/tools [#'add #'sub]})
54 |   (chat {:messages [{:role :user :content "Whats 2 plus 2"}] wkk/tools [#'add #'sub]})
55 |   (chat {:messages [{:role :user :content "what is the current weather in san francisco?"}]
56 |          wkk/tools [#'get-current-weather]})
57 |   #__)
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/bb.edn:
--------------------------------------------------------------------------------
 1 | {:paths ["src" "test" "dev"]
 2 |  :tasks {:init             (do (def standalone-jar-file "target/antq-standalone.jar")
 3 |                                (def jar-file "target/antq.jar"))
 4 |          rebel             (clojure "-M:dev:rebel")
 5 |          outdated          (clojure "-M:outdated")
 6 |          test:watch        (do (clojure "-M:outdated")
 7 |                                (clojure "-M:kaocha:test"))
 8 |          lint              (do (shell "echo cljstyle") ;; I don't have clj-style
 9 |                                (shell "clj-kondo --lint src:test"))
10 |          format            (shell "cljfmt check")
11 |          format:fix        (shell "cljfmt fix")
12 |          pom               (clojure "-Spom")
13 |          clean             (shell "rm -rf .cpcache target")
14 |          deploy            (clojure "-T:build" "build/deploy")
15 |          uber              (clojure "-T:build" "uber")
16 |          docs              (do (shell "rm -rf docs")
17 |                                (clojure "-X:dev user/build-static-docs"))
18 |          mitproxy:keystore (do
19 |                              (let [bsq-dir (str (System/getProperty "user.home") "/.bosquet")
20 |                                    mtp-dir (str (System/getProperty "user.home") "/.mitmproxy")]
21 |                                (shell (str "mkdir -p " bsq-dir))
22 |                                (shell (format "keytool -import -alias mitproxy -keystore %s/keystore -file %s/mitmproxy-ca-cert.pem -storepass changeit"
23 |                                               bsq-dir mtp-dir))))
24 |          native:build      (let [project (-> (clojure.edn/read-string (slurp "deps.edn")) :aliases :neil :project)]
25 |                              (shell
26 |                               "native-image"
27 |                               "--report-unsupported-elements-at-runtime"
28 |                               "--features=clj_easy.graal_build_time.InitClojureClasses"
29 |                               "--initialize-at-run-time=org.apache.http.impl.auth.NTLMEngineImpl"
30 |                               "--trace-object-instantiation=java.lang.Thread"
31 |                               "--initialize-at-build-time"
32 |                               "-O1"
33 |                               "-H:+UnlockExperimentalVMOptions"
34 |                               "-H:+AllowDeprecatedBuilderClassesOnImageClasspath"
35 |                               "-jar" (format "target/bosquet-%s-standalone.jar" (:version project))
36 |                               "-H:Name=" "bllm"))
37 |          lang:sent:en      (do
38 |                              (shell "mkdir -p lang/en")
39 |                              (shell (str "wget -O models/lang/en/sentence-detector.bin "
40 |                                          "https://dlcdn.apache.org/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin")))}}
41 | 


--------------------------------------------------------------------------------
/resources/mcp-example/echo.py:
--------------------------------------------------------------------------------
 1 | #to run this python script pip install mcp
 2 | import asyncio
 3 | import json
 4 | from mcp.server import Server
 5 | from mcp.server.stdio import stdio_server
 6 | from mcp.types import Tool, TextContent
 7 | 
 8 | app = Server("echo-server")
 9 | 
10 | @app.list_tools()
11 | async def list_tools() -> list[Tool]:
12 |     return [
13 |         Tool(
14 |             name="echo",
15 |             description="Echo back the message you provide",
16 |             inputSchema={
17 |                 "type": "object",
18 |                 "properties": {
19 |                     "message": {
20 |                         "type": "string",
21 |                         "description": "The message to echo back"
22 |                     }
23 |                 },
24 |                 "required": ["message"]
25 |             }
26 |         ),
27 |         Tool(
28 |             name="echo_multiple",
29 |             description="Echo back multiple arguments as a formatted string",
30 |             inputSchema={
31 |                 "type": "object",
32 |                 "properties": {
33 |                     "arg1": {
34 |                         "type": "string",
35 |                         "description": "First argument"
36 |                     },
37 |                     "arg2": {
38 |                         "type": "string",
39 |                         "description": "Second argument"
40 |                     },
41 |                     "arg3": {
42 |                         "type": "string",
43 |                         "description": "Third argument (optional)"
44 |                     }
45 |                 },
46 |                 "required": ["arg1", "arg2"]
47 |             }
48 |         ),
49 |         Tool(
50 |             name="echo_json",
51 |             description="Echo back all arguments as JSON",
52 |             inputSchema={
53 |                 "type": "object",
54 |                 "properties": {},
55 |                 "additionalProperties": True
56 |             }
57 |         )
58 |     ]
59 | 
60 | @app.call_tool()
61 | async def call_tool(name: str, arguments: dict) -> list[TextContent]:
62 |     if name == "echo":
63 |         return [TextContent(type="text", text=arguments["message"])]
64 |     
65 |     elif name == "echo_multiple":
66 |         result = f"arg1: {arguments['arg1']}, arg2: {arguments['arg2']}"
67 |         if "arg3" in arguments:
68 |             result += f", arg3: {arguments['arg3']}"
69 |         return [TextContent(type="text", text=result)]
70 |     
71 |     elif name == "echo_json":
72 |         return [TextContent(type="text", text=json.dumps(arguments, indent=2))]
73 |     
74 |     raise ValueError(f"Unknown tool: {name}")
75 | 
76 | async def main():
77 |     async with stdio_server() as (read_stream, write_stream):
78 |         await app.run(read_stream, write_stream, app.create_initialization_options())
79 | 
80 | if __name__ == "__main__":
81 |     asyncio.run(main())
82 | 


--------------------------------------------------------------------------------
/notebook/observability.clj:
--------------------------------------------------------------------------------
 1 | (ns observability
 2 |   (:require
 3 |    [bosquet.llm.generator :as g]
 4 |    [bosquet.llm.http :as http]))
 5 | 
 6 | ;; ## Observability through proxy
 7 | ;;
 8 | ;;
 9 | ;; A local proxy logging all interactions between *Bosquet* and LLM service can be a very useful debugging tool.
10 | ;; For that purpose, Bosquet can be configured to work with with [Mitproxy](https://mitmproxy.org/).
11 | ;;
12 | ;; ## Install
13 | ;;
14 | ;; Follow the installation instructions in the *Mitproxy* [documentation](https://docs.mitmproxy.org/stable/overview-installation/).
15 | ;; Once installed you can start web console with `mitmweb`.
16 | ;;
17 | ;; *Mitproxy* webconsole http://127.0.0.1:8081/#/flows
18 | ;;
19 | ;; As Mitproxy starts it will create a `~/.mitproxt` dir containing SSL certificates.
20 | ;; The certificate needs to be added to JVM keystore.
21 | ;;
22 | ;; The following command will add it to *Bosquet* keystore:
23 | ;;
24 | ;; ```bash
25 | ;; bb mitproxy:keystore
26 | ;; ```
27 | ;;
28 | ;; ## REPL
29 | ;;
30 | ;; When in REPL, this call will set JVM parameters forcing HTTP libs to use a configured proxy.
31 | 
32 | ^{:nextjournal.clerk/visibility {:result :hide}}
33 | #_(http/use-local-proxy)
34 | 
35 | ;; This function sets the following JVM properties
36 | ;; ```clojure
37 | ;; (System/setProperty "javax.net.ssl.trustStore" (str (System/getProperty "user.home") "/.bosquet/keystore"))
38 | ;; (System/setProperty "javax.net.ssl.trustStorePassword" password)
39 | ;; (System/setProperty "https.proxyHost" host)
40 | ;; (System/setProperty "https.proxyPort" (str port))
41 | ;; ```
42 | ;; After this, the `generate` call will go through Mitproxy
43 | 
44 | (g/generate
45 |  [[:system ["As a brilliant astronomer, list distances between planets and the Sun"
46 |             "in the Solar System. Provide the answer in JSON map where the key is the"
47 |             "planet name and the value is the string distance in millions of kilometers."
48 |             "{{analysis}}"]]
49 |   [:user ["Generate only JSON omit any other prose and explanations."]]
50 |   [:assistant (g/llm :mistral-medium
51 |                      :llm/var-name :distances
52 |                      :llm/output-format :json
53 |                      :llm/model-params {:max-tokens 300})]
54 |   [:user ["Based on the JSON distances data"
55 |           "provide me with​ a) average distance b) max distance c) min distance"]]
56 |   [:assistant (g/llm :mistral-small
57 |                      :llm/var-name :analysis)]])
58 | 
59 | ;; The *Mitproxy* console should show logged calls, where request, response, and latency data can be examined.
60 | ;;
61 | ;; ![Mitproxy](notebook/assets/mitproxy.png)
62 | ;;
63 | ;; ### CLI
64 | ;;
65 | ;; When using Bosquet via command line, proxy can be activated either with defaults:
66 | ;;
67 | ;; ```bash
68 | ;; clojure -M -m bosquet.cli "2+2=" --proxy
69 | ;; ```
70 | ;; or with custom host and port
71 | ;; ```bash
72 | ;; clojure -M -m bosquet.cli "2+2=" --proxy-host localhost --proxy-port 8080 --keystore-password changeit
73 | ;; ```
74 | ;; ---
75 | ;; *With many thanks to [Fuck You, Show Me The Prompt](https://hamel.dev/blog/posts/prompt/)*
76 | 


--------------------------------------------------------------------------------
/test/bosquet/agent/agent_mind_reader_test.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.agent.agent-mind-reader-test
 2 |   (:require
 3 |    [clojure.test :refer [deftest is]]
 4 |    [bosquet.agent.agent-mind-reader :as mind]))
 5 | 
 6 | (def ^:private thought-search
 7 |   "Question: Author David Chanoff has collaborated with a U.S. Navy admiral who served as the ambassador to
 8 | the United Kingdom under which President?
 9 | Thought 1: I need to search David Chanoff, find the U.S. Navy admiral he
10 | collaborated with, then find the President the admiral served under.
11 | Action 1: Search[David Chanoff]
12 | Observation 1: David Chanoff is an American author and journalist who has written or co-written
13 | over 20 books. He is best known for his collaborations with U.S. Navy
14 | Admiral James Stockdale.
15 | Thought 2: U.S. Navy Admiral James Stockdale served as the ambassador to the United Kingdom. I need to search James Stockdale and find which President he served under.
16 | Action 2: Search[James Stockdale]
17 | Observation 2: ...")
18 | 
19 | (def ^:private thought-lookup
20 |   "Observation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated
21 | television series The Simpsons voiced by Pamela Hayden and created by Matt Groening.
22 | Thought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up \"named after\".
23 | Action 2: Lookup[named after]
24 | Observation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous.")
25 | 
26 | (deftest find-first-action-test
27 |   (is (= {:action     :search
28 |           :parameters "David Chanoff"
29 |           :thought
30 |           "Thought 1: I need to search David Chanoff, find the U.S. Navy admiral he
31 | collaborated with, then find the President the admiral served under.
32 | Action 1: Search[David Chanoff]"}
33 |          (mind/find-action 1 thought-search)))
34 |   (is (= {:action     :search
35 |           :parameters "James Stockdale"
36 |           :thought
37 |           "Thought 2: U.S. Navy Admiral James Stockdale served as the ambassador to the United Kingdom. I need to search James Stockdale and find which President he served under.
38 | Action 2: Search[James Stockdale]"}
39 |          (mind/find-action 2 thought-search)))
40 |   (is (= {:action     :lookup
41 |           :parameters "named after"
42 |           :thought
43 |           "Thought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up \"named after\".
44 | Action 2: Lookup[named after]"}
45 |          (mind/find-action 2 thought-lookup))))
46 | 
47 | (deftest sentence-splitter
48 |   (is (= ["Sentence one." "Sentence A.B. two?" "Last one!"]
49 |          (mind/split-sentences "Sentence one.\nSentence A.B. two? Last one!"))))
50 | 
51 | (deftest content-lookup-index
52 |   (is (= [[0 true "This sentence one."]
53 |           [1 true "This sentence A.B. two?"]
54 |           [2 false "Almost the last sentence."]
55 |           [3 true "The A.B. is good in this sentence!"]]
56 |          (mind/lookup-index
57 |           "this Sentence"
58 |           "This sentence one.\nThis sentence A.B. two? Almost the last sentence. The A.B. is good in this sentence!"))))
59 | 


--------------------------------------------------------------------------------
/test/bosquet/llm/tools_test.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.llm.tools-test
 2 |   (:require
 3 |    [bosquet.llm.tools :refer [apply-tools tool->function]]
 4 |    [bosquet.llm.wkk :as wkk]
 5 |    [bosquet.tool.math :refer [add]]
 6 |    [bosquet.tool.weather :refer [get-current-weather]]
 7 |    [cheshire.core :as json]
 8 |    [clojure.test :refer [deftest is]]
 9 |    [jsonista.core :as j]))
10 | 
11 | (deftest test-tool->function
12 |   (let [weather-spec (tool->function #'get-current-weather)]
13 |     (is (= "function" (:type weather-spec)))
14 |     (is (= "get-current-weather" (get-in weather-spec [:function :name])))
15 |     (is (= "bosquet.tool.weather" (get-in weather-spec [:function :ns])))
16 |     (is (= "Get the current weather in a given location" (get-in weather-spec [:function :description])))
17 |     (is (= "object" (get-in weather-spec [:function :parameters :type])))
18 |     (is (contains? (get-in weather-spec [:function :parameters :properties]) :location))
19 |     (is (= "string" (get-in weather-spec [:function :parameters :properties :location :type])))
20 |     (is (= "The city, e.g. Vilnius" (get-in weather-spec [:function :parameters :properties :location :description])))
21 |     (is (= ["location"] (get-in weather-spec [:function :parameters :required])))))
22 | 
23 | (deftest test-apply-tools
24 |   (let [available-tools [#'get-current-weather #'add]
25 |         mock-openai-result
26 |         {:choices
27 |          [{:message
28 |            {:role       "assistant"
29 |             :content    nil
30 |             :tool_calls [{:id   "call_abc123"
31 |                           :type "function"
32 |                           :function
33 |                           {:name      "get-current-weather"
34 |                            :arguments (json/generate-string {:location "Vilnius"})}}]}}]}
35 | 
36 |         initial-params
37 |         {:messages [{:role "user" :content "What's the weather in Vilnius?"}]
38 |          :model    "gpt-4"
39 |          wkk/tools "some-tool-config-value"}
40 | 
41 |         generator-called-with (atom nil)
42 |         mock-generator        (fn [params-for-next-call]
43 |                                 (reset! generator-called-with params-for-next-call)
44 |                                 {:final-llm-response "The weather in Vilnius is 24C"})]
45 | 
46 |     (apply-tools mock-openai-result wkk/openai initial-params available-tools mock-generator)
47 | 
48 |     (is (some? @generator-called-with) "Generator should have been called")
49 |     (let [messages (get @generator-called-with :messages)]
50 |       (is (= 3 (count messages)) "Should have user, assistant, and tool messages")
51 |       (is (= {:role "user" :content "What's the weather in Vilnius?"} (first messages)))
52 |       (is (= "assistant" (:role (second messages))))
53 |       (is (some? (get-in (second messages) [:tool_calls 0 :function :name])))
54 |       (is (= "tool" (:role (nth messages 2))))
55 |       (is (= "call_abc123" (:tool_call_id (nth messages 2))))
56 |       (is (= {:temperature "24" :unit "celcius" :location "Vilnius"}
57 |              (-> messages (nth 2) :content (j/read-value j/keyword-keys-object-mapper))))
58 |       (is (nil? (get @generator-called-with wkk/tools)) "tools-key should be dissoc'd"))))
59 | 


--------------------------------------------------------------------------------
/src/bosquet/memory/long_term_memory.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.memory.long-term-memory
 2 |   (:gen-class)
 3 |   (:require
 4 |    [bosquet.llm.wkk :as wkk]
 5 |    [bosquet.memory.retrieval :as r]
 6 |    [clojure.core.async :refer [<! chan go go-loop onto-chan! pipeline]]))
 7 | 
 8 | (defn- embed-fn
 9 |   [{embed-impl wkk/embed-fn :as service-config}]
10 |   (partial (if (symbol? embed-impl)
11 |              ;; symbol comes from edn configs
12 |              (requiring-resolve embed-impl)
13 |              ;; this is when llm config has fn ref
14 |              embed-impl)
15 |            (dissoc service-config
16 |                    wkk/complete-fn wkk/chat-fn wkk/embed-fn)))
17 | 
18 | (defn store-embeds
19 |   "Generate embeddings via the `llm` provider, and save them to `storage`"
20 |   [storage llm opts items]
21 |   (when (seq items) (.create storage))
22 |   (let [batch-size 100
23 |         batches    (partition-all batch-size
24 |                                   (if (sequential? items) items [items]))
25 |         encode-fn  (embed-fn llm)
26 |         in         (chan)
27 |         out        (chan)]
28 | 
29 |     (pipeline
30 |      8 out
31 |      (map (fn [batch]
32 |             (map (fn [item]
33 |                    {:embedding (:embedding (encode-fn opts item))
34 |                     :payload   item})
35 |                  batch)))
36 |      in)
37 | 
38 |     (go-loop [embeds (<! out)]
39 |       (.add storage embeds)
40 |       (recur (<! out)))
41 | 
42 |     (go (onto-chan! in batches))))
43 | 
44 | (defn- retrieve-in-sequnce
45 |   "WIP. Candidate for `retrieval` ns to be reused accross memory systems"
46 |   [{object-limit r/memory-objects-limit
47 |     token-limit  r/memory-tokens-limit
48 |     :as          params} memories]
49 |   (cond->> memories
50 |     object-limit (take-last object-limit)
51 |     token-limit  (r/take-while-tokens
52 |                   (merge {wkk/model   :gpt-3.5-turbo
53 |                           wkk/service wkk/openai}
54 |                          params))))
55 | 
56 | (defn ->cue-memory
57 |   [storage llm]
58 |   (fn [{limit r/memory-objects-limit :or {limit 3} :as opts} cue]
59 |     (let [encode-fn (embed-fn llm)]
60 |       (retrieve-in-sequnce
61 |        opts
62 |        (map :payload (.search storage
63 |                               (:embedding (encode-fn opts cue))
64 |                               limit))))))
65 | 
66 | (defn ->remember
67 |   [storage llm]
68 |   (fn [opts observation]
69 |     (store-embeds storage llm opts observation)))
70 | 
71 | #_(deftype LongTermMemory
72 |            [storage llm]
73 |     mem/Memory
74 | 
75 |     (forget
76 |       [_this {:keys [collection-name]}]
77 |       (.delete storage collection-name))
78 | 
79 |     (remember
80 |       [_this observation opts]
81 |       (store-embeds storage llm opts observation))
82 | 
83 |     (free-recall [_this _cueue _params])
84 | 
85 |     (sequential-recall [_this _params])
86 | 
87 |     (cue-recall
88 |       [_this cue {:keys [limit] :or {limit 3} :as opts}]
89 |       (let [encode-fn (embed-fn llm)]
90 |         (retrieve-in-sequnce
91 |          opts
92 |          (.search storage
93 |                   (:embedding (encode-fn opts cue))
94 |                   limit))))
95 | 
96 |     (volume [_this _opts]))
97 | 


--------------------------------------------------------------------------------
/src/bosquet/llm/cohere.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.llm.cohere
 2 |   (:require
 3 |    [bosquet.env :as env]
 4 |    [bosquet.llm.schema :as schema]
 5 |    [bosquet.llm.wkk :as wkk]
 6 |    [bosquet.utils :as u]
 7 |    [cohere.client :as client]))
 8 | 
 9 | (defn- set-api-key [api-key]
10 |   (when api-key
11 |     (System/setProperty "cohere.api.key" api-key)))
12 | 
13 | (defn- props->cohere
14 |   "Convert general LLM model properties to Cohere specific ones."
15 |   [{:keys [n stop] :as props}]
16 |   (u/snake-case
17 |    (u/mergex
18 |     (dissoc  props :n :stop)
19 |     {:num_generations n}
20 |     {:stop_sequences stop})))
21 | 
22 | (defn usage->canonical
23 |   [{:keys [input_tokens output_tokens]}]
24 |   {schema/usage-in-count input_tokens
25 |    schema/usage-out-count output_tokens
26 |    schema/usage-total-count (+ output_tokens input_tokens)})
27 | 
28 | (defn complete
29 |   ([{api-key :api-key url :api-endpoint} params]
30 |    (set-api-key api-key)
31 |    (u/log-call url params)
32 |    (let [{{usage :billed_units} :meta generations :generations}
33 |          (client/generate (props->cohere params))]
34 |      {wkk/generation-type :completion
35 |       wkk/content         {:completion (-> generations first :text)}
36 |       wkk/usage           (usage->canonical usage)}))
37 |   ([params]
38 |    (complete (wkk/cohere env/config) params)))
39 | 
40 | (defn chatml->cohere
41 |   "Transform ChatML messages to the message data shape required by Cohere API"
42 |   [messages]
43 |   (mapv
44 |    (fn [{:keys [role content]}]
45 |      {:user_name role :text content})
46 |    messages))
47 | 
48 | (defn chat
49 |   ([params] (chat (wkk/cohere env/config) params))
50 |   ([{api-key :api-key url :api-endpoint} {messages :messages :as params}]
51 |    (set-api-key api-key)
52 |    (u/log-call url params)
53 |    (let [params   (dissoc params :messages)
54 |          messages (chatml->cohere messages)
55 |          message  (-> messages last :text)
56 |          history  (butlast messages)
57 |          {{usage :billed_units} :meta text :text}
58 |          (client/chat
59 |           (props->cohere (assoc params
60 |                                 :message message
61 |                                 :chat_history history)))]
62 |      {wkk/generation-type :chat
63 |       wkk/content         {:role :assistant :content text}
64 |       wkk/usage           (usage->canonical usage)})))
65 | 
66 | (comment
67 |   (def messages [{:role :user :content "Let's do some calculations!"}
68 |                  {:role :chatbot :content "Certainly, I am happy to calculate"}
69 |                  {:role :user :content "4+4="}])
70 | 
71 |   (client/chat
72 |    :chat_history (chatml->cohere [{:role :user :content "Let's do some calculations!"}
73 |                                   {:role :chatbot :content "Certainly, I am happy to calculate"}])
74 |    :message "2+2=")
75 | 
76 |   (client/generate (props->cohere
77 |                     {:model          "command"
78 |                      :prompt         "Today is a"
79 |                      :n              1
80 |                      :stop-sequences ["\n"]
81 |                      :temperature    0.2}))
82 |   (complete
83 |    {:prompt         "A party is about to begin."
84 |     :model          "command"
85 |     :n              1
86 |     :stop-sequences ["\n"]
87 |     :temperature    0.2}))
88 | 


--------------------------------------------------------------------------------
/src/bosquet/utils.clj:
--------------------------------------------------------------------------------
  1 | (ns bosquet.utils
  2 |   (:require
  3 |    [clojure.edn :as edn]
  4 |    [clojure.java.io :as io]
  5 |    [clojure.string :as string]
  6 |    [clojure.walk :refer [postwalk]]
  7 |    [jsonista.core :as j]
  8 |    [me.flowthing.pp :as pp]
  9 |    [taoensso.timbre :as timbre]
 10 |    [net.modulolotus.truegrit.circuit-breaker :as cb])
 11 |   (:import
 12 |    [java.util UUID]))
 13 | 
 14 | (def rest-service-cb (cb/circuit-breaker "shared-rest-service"
 15 |                                          {:failure-rate-threshold 30
 16 |                                           :minimum-number-of-calls 2}))
 17 | 
 18 | (defn uuid []
 19 |   (UUID/randomUUID))
 20 | 
 21 | (defn pp-str
 22 |   [x]
 23 |   (with-out-str (pp/pprint x)))
 24 | 
 25 | (defn pp
 26 |   [x]
 27 |   (pp/pprint x))
 28 | 
 29 | (defn safe-subs
 30 |   "Substring with safety of going over the max length"
 31 |   ([s start end]
 32 |    (subs s start (min end (count s))))
 33 |   ([s start]
 34 |    (subs s start)))
 35 | 
 36 | (defn concatv
 37 |   "Non-lazily concat any number of collections, returning a persistent vector."
 38 |   ([]
 39 |    [])
 40 |   ([x]
 41 |    (vec x))
 42 |   ([x & ys]
 43 |    (into (vec x) cat ys)))
 44 | 
 45 | (defn join-lines [& lines]
 46 |   (apply str (interpose "\n" lines)))
 47 | 
 48 | (defn join-coll [content]
 49 |   (if (coll? content) (string/join "\n" content) content))
 50 | 
 51 | (defn read-json
 52 |   "Read JSON from a string keywordizing keys"
 53 |   [s]
 54 |   (j/read-value s j/keyword-keys-object-mapper))
 55 | 
 56 | (defn write-json
 57 |   "Write JSON to a string"
 58 |   [s]
 59 |   (j/write-value-as-string s))
 60 | 
 61 | (defn flattenx
 62 |   "Flatten a nested collection"
 63 |   [coll]
 64 |   (remove nil? (flatten coll)))
 65 | 
 66 | (defn mergex
 67 |   "Merge maps filtering nil values"
 68 |   [& maps]
 69 |   (apply
 70 |    merge
 71 |    (map (fn [a-map]
 72 |           (reduce-kv
 73 |            (fn [m k v] (if (nil? v) m (assoc m k v)))
 74 |            {}
 75 |            a-map))
 76 |         maps)))
 77 | 
 78 | (defn kebab->snake [s]
 79 |   (string/replace s #"-" "_"))
 80 | 
 81 | (defn camel->snake [s]
 82 |   (string/replace s #"([a-z0-9])([A-Z])" "$1_$2"))
 83 | 
 84 | (defn ->snake_case_keyword [k]
 85 |   (-> k
 86 |       name
 87 |       kebab->snake
 88 |       camel->snake
 89 |       string/lower-case
 90 |       keyword))
 91 | 
 92 | ;; Taken from camel-snake-kebab.extras
 93 | ;; https://clj-commons.org/camel-snake-kebab/
 94 | ;; conflicts with clj-commons/clj-yaml {:mvn/version "1.0.27"}
 95 | (defn transform-keys
 96 |   "Recursively transforms all map keys in coll with t."
 97 |   [t coll]
 98 |   (letfn [(transform [[k v]] [(t k) v])]
 99 |     (postwalk (fn [x] (if (map? x) (into {} (map transform x)) x)) coll)))
100 | 
101 | (defn snake-case
102 |   "Snake case keys from `:max-tokens` to `:max_tokens`"
103 |   [m]
104 |   (transform-keys ->snake_case_keyword m))
105 | 
106 | (defn log-call
107 |   [url params]
108 |   (timbre/infof "💬 Calling %s with:" url)
109 |   (doseq [[k v] (dissoc params :messages)]
110 |     (timbre/infof "   %-15s%s" k v)))
111 | 
112 | (defn now []
113 |   (inst-ms (java.time.Instant/now)))
114 | 
115 | (defn read-edn-file [file-path]
116 |   (with-open [reader (io/reader file-path)]
117 |     (edn/read (java.io.PushbackReader. reader))))
118 | 


--------------------------------------------------------------------------------
/deps.edn:
--------------------------------------------------------------------------------
 1 | {:paths   ["src" "resources"]
 2 |  :deps    {org.clojure/clojure                           {:mvn/version "1.12.3"}
 3 |            io.github.zmedelis/hfds-clj                   {:mvn/version "2023.12.11"}
 4 |            metosin/malli                                 {:mvn/version "0.20.0-alpha2"}
 5 |            com.wsscode/pathom3                           {:mvn/version "2025.01.16-alpha"}
 6 |            net.clojars.wkok/openai-clojure               {:mvn/version "0.23.0"}
 7 |            com.taoensso/timbre                           {:mvn/version "6.8.0"}
 8 |            com.knuddels/jtokkit                          {:mvn/version "1.1.0"}
 9 |            clj-http/clj-http                             {:mvn/version "3.13.1"}
10 |            metosin/jsonista                              {:mvn/version "0.3.13"}
11 |            org.clojars.danielsz/cohere                   {:mvn/version "1.0.0"}
12 |            aero/aero                                     {:mvn/version "1.1.6"}
13 |            org.clojure/core.cache                        {:mvn/version "1.1.234"}
14 |            org.apache.commons/commons-text               {:mvn/version "1.14.0"}
15 |            me.flowthing/pp                               {:mvn/version "2024-11-13.77"}
16 |            org.apache.tika/tika-core                     {:mvn/version "3.2.3"}
17 |            org.apache.tika/tika-parser-html-commons      {:mvn/version "2.9.4"}
18 |            org.apache.tika/tika-parsers-standard-package {:mvn/version "3.2.3"}
19 |            org.apache.opennlp/opennlp-tools              {:mvn/version "2.5.6.1"}
20 |            org.clojure/tools.cli                         {:mvn/version "1.2.245"}
21 |            com.fzakaria/slf4j-timbre                     {:mvn/version "0.4.1"}
22 |            org.clojure/core.async                        {:mvn/version "1.8.741"}
23 |            selmer/selmer                                 {:mvn/version "1.12.65"}
24 |            net.modulolotus/truegrit                      {:mvn/version "2.3.35"}
25 |            aysylu/loom                                   {:mvn/version "1.0.2"}}
26 |  :aliases {:dev        {:extra-paths ["dev" "notebook"]
27 |                         :extra-deps  {io.github.nextjournal/clerk          {:mvn/version "0.18.1150"}
28 |                                       djblue/portal                        {:mvn/version "0.61.0"}
29 |                                       com.github.clj-easy/graal-build-time {:mvn/version "1.0.5"}}}
30 |            :neil       {:project {:name    io.github.zmedelis/bosquet
31 |                                   :version "2025.10.15"}}
32 |            :rebel      {:extra-deps {com.bhauman/rebel-readline {:mvn/version "0.1.5"}}
33 |                         :main-opts  ["-m" "rebel-readline.main"]}
34 |            :outdated   {:deps      {com.github.liquidz/antq {:mvn/version "RELEASE"}}
35 |                         :main-opts ["-m" "antq.core" "--upgrade"]}
36 |            :test       {:extra-deps {nubank/matcher-combinators {:mvn/version "3.9.2"}}}
37 |            :kaocha     {:main-opts   ["-m" "kaocha.runner"]
38 |                         :extra-paths ["test"]
39 |                         :extra-deps  {lambdaisland/kaocha {:mvn/version "1.91.1392"}}}
40 |            :build      {:deps       {io.github.clojure/tools.build {:git/tag "v0.10.10"
41 |                                                                     :git/sha "deedd62"}
42 |                                      slipset/deps-deploy           {:mvn/version "0.2.2"}}
43 |                         :ns-default build}
44 |            :ns-default build}}
45 | 


--------------------------------------------------------------------------------
/src/bosquet/llm/tools.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.llm.tools
 2 |   (:require
 3 |    [bosquet.llm.wkk :as wkk]
 4 |    [cheshire.core :as json]
 5 |    [taoensso.timbre :as timbre]))
 6 | 
 7 | (defn tool->function [tool-var]
 8 |   (let [fn-meta (meta tool-var)
 9 |         args (into {}
10 |                    (map
11 |                     (fn [arg]
12 |                       (let [arg-meta (meta arg)]
13 |                         [(keyword arg) {:type (:type arg-meta)
14 |                                         :description (:desc arg-meta)}]))
15 |                     (first (:arglists (meta tool-var)))))]
16 |     {:type "function"
17 |      :function {:name (name (:name fn-meta))
18 |                 :ns (str (:ns fn-meta))
19 |                 :description (:desc fn-meta)
20 |                 :parameters {:type "object"
21 |                              :properties args
22 |                              :required (map name (first (:arglists fn-meta)))}}}))
23 | 
24 | (defn- select-tool-by-name [tools function]
25 |   (first (filter #(= (:name function) (name (:name (meta %)))) tools)))
26 | 
27 | (defn- parse-arguments [model-engine result]
28 |   (let [result (condp = model-engine
29 |                  wkk/ollama (:message result)
30 |                  wkk/openai (-> result :choices first :message))]
31 |     (update result :tool_calls
32 |             #(map (fn [tool-call]
33 |                     (update-in tool-call [:function :arguments]
34 |                                (fn [arguments]
35 |                                  (if (string? arguments)
36 |                                    (json/parse-string arguments true)
37 |                                    arguments)))) %))))
38 | 
39 | (defn- apply-fn [tool function]
40 |   (let [args (first (:arglists (meta tool)))]
41 |     (apply tool (map #(get (:arguments function) (keyword %)) args))))
42 | 
43 | (defn- apply-tool
44 |   [tools {:keys [id function]}]
45 |   (let [tool (select-tool-by-name tools function)]
46 |     {:id id
47 |      :function function
48 |      :result (when tool
49 |                (apply-fn tool function))}))
50 | 
51 | (defn- tool-result-formatter
52 |   [model-engine result tool-results]
53 |   (let [tool-results (map #(condp = model-engine
54 |                              wkk/ollama {:role "tool" :content (json/generate-string (:result %))}
55 |                              wkk/openai {:role "tool" :content (json/generate-string (:result %)) :tool_call_id (:id %)}) tool-results)]
56 |     (condp = model-engine
57 |       wkk/ollama tool-results
58 |       wkk/openai (concat [{:role "assistant" :tool_calls (-> result :choices first :message :tool_calls)}] tool-results))))
59 | 
60 | (defn apply-tools
61 |   [result engine params tools generator]
62 |   (timbre/infof "Applying tools %d for engine %s" (count tools) engine)
63 |   (if (not-empty tools)
64 |     (let [parsed-result (parse-arguments engine result)
65 |           fn-results    (->> (:tool_calls parsed-result)
66 |                              (map #(apply-tool tools %)))
67 |           tool-messages (tool-result-formatter engine result fn-results)
68 |           messages      (concat (vec (:messages params)) tool-messages)]
69 |       (timbre/debug messages)
70 |       (if (seq? tool-messages)
71 |         (generator (-> params
72 |                        (dissoc wkk/tools)
73 |                        (assoc :messages messages)))
74 |         result))
75 |     result))
76 | 
77 | (comment
78 |   ;; Example tool registration
79 |   (require '[bosquet.tool.math :refer [add]]
80 |            '[bosquet.tool.weather :refer [get-current-weather]])
81 |   (tool->function #'get-current-weather)
82 |   (tool->function #'add))
83 | 


--------------------------------------------------------------------------------
/src/bosquet/llm/ollama.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.llm.ollama
 2 |   (:require
 3 |    [bosquet.env :as env]
 4 |    [bosquet.llm.http :as http]
 5 |    [bosquet.llm.oai-shaped-llm :as oai]
 6 |    [bosquet.llm.wkk :as wkk]
 7 |    [bosquet.llm.tools :as tools]))
 8 | 
 9 | (defn ->completion
10 |   [{:keys [response message prompt_eval_count eval_count]
11 |     ;; ollama returns 0 for prompt eval if the prompt was cached
12 |     :or   {prompt_eval_count 0 eval_count 0}}]
13 |   (assoc
14 |    (cond
15 |      message  {wkk/generation-type :chat
16 |                wkk/content         (oai/chatml->bosquet message)}
17 |      response {wkk/generation-type :completion
18 |                wkk/content         response})
19 |    wkk/usage {:prompt     prompt_eval_count
20 |               :completion eval_count
21 |               :total      (+ eval_count prompt_eval_count)}))
22 | 
23 | (defn- chat-fn [{:keys [api-endpoint]}]
24 |   (partial http/resilient-post (str api-endpoint "/chat")))
25 | 
26 | (defn- completion-fn [{:keys [api-endpoint]}]
27 |   (partial http/resilient-post (str api-endpoint "/generate")))
28 | 
29 | (defn- embedding-fn [{:keys [api-endpoint]}]
30 |   (partial http/resilient-post (str api-endpoint "/embeddings")))
31 | 
32 | (defn- generate
33 |   [default-params params gen-fn]
34 |   (let [tools  (map tools/tool->function (wkk/tools params))
35 |         tool-defs (wkk/tools params)
36 |         params (-> params (assoc :stream false) (oai/prep-params default-params) (assoc :tools tools))]
37 |     (-> (gen-fn params)
38 |         (tools/apply-tools wkk/ollama params tool-defs gen-fn)
39 |         ->completion)))
40 | 
41 | (defn chat
42 |   [service-cfg params]
43 |   (generate service-cfg params (chat-fn service-cfg)))
44 | 
45 | (defn complete
46 |   [service-cfg params]
47 |   (generate service-cfg (dissoc params wkk/tools) (completion-fn service-cfg)))
48 | 
49 | (defn create-embedding
50 |   "Works as the equivalent of this:
51 | 
52 |   ```
53 |   curl http://localhost:11434/api/embeddings -d '{
54 |   \"model\": \"all-minilm\",
55 |   \"prompt\": \"Here is an article about llamas...\"}'
56 |   ```
57 | 
58 |   https://github.com/ollama/ollama/blob/main/docs/api.md#generate-embeddings"
59 |   [service-cfg {:keys [model content]
60 |                 :or   {content identity}} payload]
61 |   ((embedding-fn service-cfg)
62 |    {:model  model
63 |     :prompt (content payload)}))
64 | 
65 | (comment
66 |   (create-embedding (env/config :ollama) {:model :llama3.2}
67 |                     "Here is an article about llamas...")
68 | 
69 |   (create-embedding (env/config :ollama) {:model :all-minilm
70 |                                           :content :text}
71 |                     {:text "Here is an article about llamas..."
72 |                      :score 100})
73 |   (complete {:api-endpoint "http://localhost:11434/api"}
74 |             {:model "llama3.2:3b"
75 |              :prompt "why is the sky blue?"
76 |              wkk/tools [#'tools/get-current-weather]})
77 | 
78 |   (chat {:api-endpoint "http://localhost:11434/api"}
79 |         {:model "llama3.2:3b"
80 |          :messages [{:role :user :content "What is the weather in san francisco?"}]
81 |          wkk/tools [#'tools/get-current-weather]})
82 | 
83 |   (chat {:api-endpoint "http://localhost:11434/api"}
84 |         {:model "llama3.2:3b"
85 |          :messages [{:role :user :content "What is 2 plus 2 minus 2"}]
86 |          wkk/tools [#'tools/add #'tools/sub]})
87 |   (complete {:api-endpoint "http://localhost:11434/api"}
88 |             {:model "llama3.2:3b"
89 |              :prompt "The current weather in san francisco is"
90 |              wkk/tools [#'tools/get-current-weather]}))
91 | 


--------------------------------------------------------------------------------
/src/bosquet/db/qdrant.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.db.qdrant
 2 |   (:require
 3 |    [bosquet.db.vector-db :as vdb]
 4 |    [bosquet.env :as env]
 5 |    [bosquet.utils :as u]
 6 |    [hato.client :as hc]
 7 |    [jsonista.core :as j]
 8 |    [taoensso.timbre :as log]))
 9 | 
10 | (defn- collections-path
11 |   "URL endpoint for collection with the `name` operations."
12 |   [{:keys [api-endpoint]} {:keys [collection-name]}]
13 |   (str  api-endpoint "/collections/" collection-name))
14 | 
15 | (defn- points-path
16 |   "URL endpoint for collection with the `points` operations."
17 |   [{:keys [api-endpoint]} {:keys [collection-name]}]
18 |   (format "%s/collections/%s/points?wait=true"
19 |           api-endpoint collection-name))
20 | 
21 | (defn- search-path
22 |   "URL endpoint for collection with the `points` operations."
23 |   [{:keys [api-endpoint]} {:keys [collection-name]}]
24 |   (format "%s/collections/%s/points/search" api-endpoint collection-name))
25 | 
26 | (defn collection-info
27 |   [opts params]
28 |   (let [{:keys [body status]} (hc/get (collections-path opts params)
29 |                                       {:throw-exceptions? false})]
30 |     (condp = status
31 |       200 (-> body (j/read-value j/keyword-keys-object-mapper) :result)
32 |       404 nil)))
33 | 
34 | (defn create-collection
35 |   "Create a collection with `name` and `config`"
36 |   [opts params]
37 |   (when-not (collection-info opts params)
38 |     (hc/put (collections-path opts params)
39 |             {:content-type :json
40 |              :body         (j/write-value-as-string
41 |                             {:vectors (u/snake-case
42 |                                        (merge
43 |                                         (dissoc opts :api-endpoint)
44 |                                         params))})})))
45 | 
46 | (defn delete-collection
47 |   [opts params]
48 |   (hc/delete opts params))
49 | 
50 | (defn add-docs
51 |   "Add docs to the `collection-name` if collection does not exist, create it."
52 |   [opts params data]
53 |   (when (seq data)
54 |     (let [points {:points (mapv (fn [{:keys [payload embedding]}]
55 |                                   {:id      (u/uuid)
56 |                                    :vector  embedding
57 |                                    :payload payload})
58 |                                 data)}]
59 |       (hc/put (points-path opts params)
60 |               {:content-type :json
61 |                :body         (j/write-value-as-string points)}))))
62 | 
63 | (defn search
64 |   ([opts params embeds-vector]
65 |    (search opts params embeds-vector nil))
66 |   ([opts params embeds-vector {:keys [limit]
67 |                                :or   {limit 3}}]
68 |    (try
69 |      (let [res (-> (search-path opts params)
70 |                    (hc/post
71 |                     {:content-type :json
72 |                      :body         (j/write-value-as-string
73 |                                     {:vector       embeds-vector
74 |                                      :top          limit
75 |                                      :with_payload true})})
76 |                    :body u/read-json :result)]
77 |        (map #(select-keys % [:id :score :payload]) res))
78 |      (catch Exception e
79 |        (log/error e)))))
80 | 
81 | (deftype Qdrant
82 |          [params]
83 |   vdb/VectorDB
84 | 
85 |   (create [_this]
86 |     (create-collection (env/val :qdrant) params))
87 | 
88 |   (delete [_this]
89 |     (delete-collection (env/val :qdrant) params))
90 | 
91 |   (add [_this docs]
92 |     (add-docs (env/val :qdrant) params docs))
93 | 
94 |   (search [_this embeddings search-opts]
95 |     (search (env/val :qdrant) params embeddings search-opts)))
96 | 


--------------------------------------------------------------------------------
/notebook/examples/math_generate_code.clj:
--------------------------------------------------------------------------------
 1 | (ns math-generate-code
 2 |   (:require
 3 |    [bosquet.llm.generator :as g]
 4 |    [bosquet.llm.wkk :as wkk]
 5 |    [bosquet.utils :as u]
 6 |    [nextjournal.clerk :as c]))
 7 | 
 8 | ;; ## Code generation for math calculations
 9 | ;;
10 | ;; > Based on https://github.com/outlines-dev/outlines/blob/main/examples/math_generate_code.py
11 | ;;
12 | ;; This example shows how to construct a few-shot learning prompt to generate code for math calculations.
13 | ;; This relies on Selmer for loop construct to iterate over the examples and Bosquet gen tag to generate the code.
14 | 
15 | ;; ### Defining few-shot examples
16 | ;;
17 | ;; Examples are defined as collections containing example data points.
18 | ;;
19 | (def examples [{:question "What is 37593 * 67?"
20 |                 :code     "(* 37593 67)"}
21 |                {:question (u/join-lines
22 |                            "Janet's ducks lay 16 eggs per day."
23 |                            "She eats three for breakfast every morning and bakes muffins for her friends every day with four."
24 |                            "She sells the remainder at the farmers' market daily for $2 per fresh duck egg."
25 |                            "How much in dollars does she make every day at the farmers' market?")
26 |                 :code     "(* (- 16 3 4) 2)"}
27 |                {:question "A robe takes 2 bolts of blue fiber and half that much white fiber. How many bolts in total does it take?"
28 |                 :code     "(+ 2 (/ 2 2)"}])
29 | 
30 | ;; This is used in the Selmer template iterating over the examples. See `:calc` in `prompt` map below.
31 | ;;
32 | ;; ### Defining the prompt
33 | ;;
34 | ;; Few shot learning-based prompt needs to list the examples followed by the request to answer the
35 | ;; question. The bellow prompt is constructed using a separate few-shot section and calc section
36 | ;; that constructs the request to generate text in the following 'CODE:'.
37 | 
38 | (def prompt {:calc     ["{% for example in examples %}"
39 |                         "QUESTION: {{example.question}}"
40 |                         "CODE: {{example.code}}"
41 |                         "{% endfor %}"
42 |                         ""
43 |                         "QUESTION: {{question}}"
44 |                         "CODE: {{answer}}"]
45 |              :answer   (g/llm :openai wkk/model-params {:model :gpt-4})})
46 | 
47 | ;;
48 | ;; Let's have two questions to generate code for.
49 | ;;
50 | 
51 | ;; #### Question 1
52 | 
53 | (def question1
54 |   (u/join-lines
55 |    "Carla is downloading a 200 GB file. She can download 2 GB/minute, but 40% of the way"
56 |    "through the download, the download fails."
57 |    "Then Carla has to restart the download from the beginning. How long did it take"
58 |    "her to download the file in minutes?"))
59 | 
60 | ^{:nextjournal.clerk/visibility :fold}
61 | (let [{{answer :answer} g/completions}
62 |       (g/generate prompt {:examples examples :question question1})]
63 |   (c/html
64 |    [:div
65 |     [:div "Code:" [:pre answer]]
66 |     [:div "Eval:" [:pre (-> answer read-string eval)]]]))
67 | 
68 | ;; #### Question 2 answer
69 | 
70 | (def question2
71 |   (u/join-lines
72 |    "Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and"
73 |    "bakes muffins for her friends every day with four."
74 |    "She sells the remainder for $2 per egg. How much does she make every day?"))
75 | 
76 | ^{:nextjournal.clerk/visibility :fold}
77 | (let [{{answer :answer} g/completions}
78 |       (g/generate prompt {:examples examples :question question2})]
79 |   (c/html [:div
80 |            [:div "Code:" [:pre answer]]
81 |            [:div "Eval:" [:pre (-> answer read-string eval)]]]))
82 | 


--------------------------------------------------------------------------------
/src/bosquet/mcp/core.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.mcp.core
 2 |   (:require [clojure.string :as str]
 3 |             [taoensso.timbre :as timbre]
 4 |             [bosquet.mcp.client :as client]))
 5 | 
 6 | (def ^:dynamic *transports* (atom {}))
 7 | (def ^:dynamic *tool-vars* (atom []))
 8 | 
 9 | (defn create-tool-fn
10 |   "Create a Bosquet-compatible tool function"
11 |   [transport tool]
12 |   (let [tool-name (symbol (:name tool))
13 |         input-schema (:inputSchema tool)
14 |         properties (:properties input-schema {})
15 |         required (map keyword (:required input-schema []))
16 |         param-names (vec (concat required
17 |                                  (remove (set required) (keys properties))))
18 |         tool-fn (fn [& args]
19 |                   (timbre/info "Calling with args:" args)
20 |                     ;; Convert keywords back to strings for MCP
21 |                   (let [arguments (zipmap (map name param-names) args)]
22 |                     (timbre/info "Mapped to:" arguments)
23 |                     (let [result (client/call-tool transport (:name tool) arguments)]
24 |                       (if (sequential? result)
25 |                         (clojure.string/join " " (map #(get % "text" "") result))
26 |                         (str result)))))]
27 | 
28 |     (intern 'bosquet.mcp.tools
29 |             tool-name
30 |             (with-meta tool-fn
31 |               {:doc (:description tool)
32 |                :desc (:description tool)
33 |                :arglists (list (vec (map (fn [pname]
34 |                                            (let [pdef (get properties pname)]
35 |                                              (with-meta (symbol (name pname))
36 |                                                {:type (get pdef :type "string")
37 |                                                 :desc (get pdef :description "")})))
38 |                                          param-names)))}))
39 | 
40 |     (ns-resolve 'bosquet.mcp.tools tool-name)))
41 | 
42 | (defn initialize-mcp-servers!
43 |   "Initialize MCP servers - process spawning is handled automatically"
44 |   [configs]
45 |   (timbre/info "Initializing" (count configs) "MCP servers")
46 |   (reset! *transports* {})
47 |   (reset! *tool-vars* [])
48 | 
49 |   (doseq [[sym _] (ns-publics 'bosquet.mcp.tools)]
50 |     (ns-unmap 'bosquet.mcp.tools sym))
51 | 
52 |   (doseq [config configs]
53 |     (try
54 |       (let [name (:name config)
55 |             transport (client/create-transport config)]
56 | 
57 |         (timbre/info "Connecting to" name "via" (or (:type config) :stdio))
58 | 
59 |         (client/initialize transport)
60 |         (swap! *transports* assoc name transport)
61 | 
62 |         (doseq [tool (client/list-tools transport)]
63 |           (let [tool-var (create-tool-fn transport tool)]
64 |             (swap! *tool-vars* conj tool-var)
65 |             (timbre/info "  →" (:name tool))))
66 | 
67 |         (timbre/info "Ready:" name))
68 | 
69 |       (catch Exception e
70 |         (timbre/error e "Failed:" (:name config)))))
71 | 
72 |   (timbre/info "All servers ready"))
73 | 
74 | (defn get-tool-vars
75 |   "Helper to get all MCP tool vars for use in wkk/tools"
76 |   []
77 |   @*tool-vars*)
78 | 
79 | (defn shutdown-mcp!
80 |   "Shutdown all MCP connections (kills processes for stdio)"
81 |   []
82 |   (doseq [[name transport] @*transports*]
83 |     (client/shutdown transport)
84 |     (timbre/info "Stopped" name))
85 |   (reset! *transports* {})
86 |   (reset! *tool-vars* []))
87 | 
88 | (comment
89 |   (require '[bosquet.mcp.tools :as mcp-tools])
90 |   (initialize-mcp-servers!
91 |    [{:name "echo-server"
92 |      :type :stdio
93 |      :command "python3"
94 |      :args ["resources/mcp-example/echo.py"]}])
95 |   (mcp-tools/echo_multiple "hello world" "this is rohit")
96 |   (mcp-tools/echo "Hello world")
97 |   (shutdown-mcp!))
98 | 


--------------------------------------------------------------------------------
/src/bosquet/llm/oai_shaped_llm.clj:
--------------------------------------------------------------------------------
  1 | (ns bosquet.llm.oai-shaped-llm
  2 |   (:require
  3 |    [bosquet.llm.http :as http]
  4 |    [bosquet.llm.wkk :as wkk]
  5 |    [bosquet.utils :as u]
  6 |    [clojure.set :as set]))
  7 | 
  8 | (defn prep-params
  9 |   "Shape `params` into the LLM API service required structure.
 10 |   Remove or move `Bosquet` parameters.
 11 | 
 12 |   If `params` has no `model` specified model in `default-parms` will be used."
 13 |   ([params] (prep-params params nil))
 14 |   ([params defaults]
 15 |    (-> params
 16 |        (u/mergex defaults params)
 17 |        (dissoc wkk/model-params)
 18 |        (dissoc wkk/tools)
 19 |        (merge (wkk/model-params params)))))
 20 | 
 21 | ;; ## ChatML
 22 | 
 23 | (def role
 24 |   :role)
 25 | 
 26 | (def content
 27 |   :content)
 28 | 
 29 | (def system
 30 |   "Key to reference `system` role in ChatML format"
 31 |   :system)
 32 | 
 33 | (def user
 34 |   "Key to reference `user` role in ChatML format"
 35 |   :user)
 36 | 
 37 | (def assistant
 38 |   "Key to reference `assistant` role in ChatML format"
 39 |   :assistant)
 40 | 
 41 | (def ^:private role-mapping
 42 |   (let [roles {system    :system
 43 |                user      :user
 44 |                assistant :assistant}]
 45 |     (merge roles (set/map-invert roles))))
 46 | 
 47 | (defn chatml->bosquet
 48 |   [{r :role c :content}]
 49 |   {role (role-mapping (keyword r)) content c})
 50 | 
 51 | (defn ->completion
 52 |   "Build Bosquet completion data structure from
 53 |   the OAI-shaped responses.
 54 | 
 55 |   Gets only the first of completion `choices`"
 56 |   [{[{:keys [text message]} & _choices]    :choices
 57 |     {total_tokens      :total_tokens
 58 |      prompt_tokens     :prompt_tokens
 59 |      completion_tokens :completion_tokens} :usage}]
 60 |   (assoc
 61 |    (cond
 62 |      message {wkk/generation-type :chat
 63 |               wkk/content         (chatml->bosquet message)}
 64 |      text    {wkk/generation-type :completion
 65 |               wkk/content         text})
 66 |    wkk/usage   {:prompt     prompt_tokens
 67 |                 :completion completion_tokens
 68 |                 :total      total_tokens}))
 69 | 
 70 | (defn completion-fn
 71 |   "Create completion http client based on passed in parameters.
 72 |    If `api-endpoint` is specified in the configuration then it will be assumed that
 73 |    OAI style endpoint URL structure is used.
 74 |    If we get `api-endpoint-messages` then we use endpoint URL for that specific task
 75 |    as it is passed in in the config."
 76 |   [{:keys [api-endpoint
 77 |            api-endpoint-messages
 78 |            api-key]}]
 79 |   (partial http/resilient-post
 80 |            (if api-endpoint (str api-endpoint "/chat/completions")
 81 |                api-endpoint-messages)
 82 |            (when api-key {:oauth-token api-key})))
 83 | 
 84 | (defn create-completion
 85 |   "Make a call to OAI API shaped service.
 86 | 
 87 |   - `service-cfg` will contain props needed to make call: endpoint, model defaults, etc
 88 |   - `params` is the main payload of the call containing model params, and prompt in `messages`
 89 |   - `content` is intended for `complete` workflow where we do not have chat `messages` in `params`"
 90 |   ([service-cfg params content]
 91 |    (create-completion service-cfg
 92 |                       (-> params
 93 |                           (assoc :messages content)
 94 |                           (dissoc :prompt))))
 95 |   ([{default-params :model-params :as service-cfg} params]
 96 |    (let [lm-call (completion-fn service-cfg)]
 97 |      (-> params
 98 |          (prep-params default-params)
 99 |          lm-call
100 |          ->completion))))
101 | 
102 | (defn chat
103 |   [service-cfg params]
104 |   (create-completion service-cfg params))
105 | 
106 | (defn complete
107 |   [service-cfg {prompt :prompt :as params}]
108 |   (create-completion service-cfg params
109 |                      [{:role :user :content prompt}]))
110 | 


--------------------------------------------------------------------------------
/src/bosquet/memory/memory.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.memory.memory
 2 |   (:require
 3 |    [bosquet.memory.retrieval :as r]
 4 |    [bosquet.wkk :as wkk]
 5 |    [taoensso.timbre :as timbre]))
 6 | 
 7 | ;; https://gentopia.readthedocs.io/en/latest/agent_components.html#long-short-term-memory
 8 | ;; Memory component is used for one of the following purposes:
 9 | 
10 | ;; - Escaping context limitation of LLMs. eg. when you expect a very long
11 | ;;   conversation or task solving trajectory, exceeding the max_token limit
12 | ;;   of LLMs.
13 | 
14 | ;; - Saving token consumption. eg. when you expect to have lengthy and
15 | ;;   unnecessary tool response (like Wikipedia Search) stored in-context.
16 | 
17 | ;; https://arxiv.org/pdf/2304.03442.pdf
18 | ;; memory stream, a long-term memory module that records, in natural language,
19 | ;; a comprehensive list of the agent’s experiences.
20 | ;; A memory retrieval model combines relevance, recency, and importance to
21 | ;; surface the records needed to inform the agent’s moment-to-moment behavior.
22 | 
23 | ;; The memory stream maintains a comprehensive record of the agent’s experience.
24 | ;; It is a list of memory objects, where each object contains a natural language
25 | ;; description, a creation timestamp, and a most recent access timestamp. The most basic element
26 | ;; of the memory stream is an observation, which is an event directly
27 | ;; perceived by an agent.
28 | ;;
29 | ;; Components of memory retrieval
30 | ;; - recency
31 | ;; - relevancy
32 | ;; - importance
33 | ;; - reflection
34 | ;;
35 | ;; Encode: Chunking, Semantic, Metadata
36 | ;; Store: Atom, VectorDB
37 | ;; Retrieve: Sequential, Cueue, Query
38 | ;;
39 | 
40 | (defprotocol Memory
41 |   (remember [this observation params])
42 |   (forget [this params])
43 |   (free-recall [this params cue])
44 |   (sequential-recall [this params])
45 |   (cue-recall [this params cue])
46 |   ;; TODO volume calculation should not be a concern of Memory
47 |   ;; It can store whatever it can constrained by storage mechanism
48 |   ;; what can be used by the memory is defined by generation model
49 |   (volume [this opts]))
50 | 
51 | ;; Someone who forgets it all. To be used when memory is not needed (default)
52 | (deftype Amnesiac
53 |          []
54 |   Memory
55 |   (volume [_this _opts])
56 |   (remember [_this _observation _params])
57 |   (forget [_this _params])
58 |   (free-recall [_this _cueue _params])
59 |   (sequential-recall [_this _params])
60 |   (cue-recall [_this _cue _params]))
61 | 
62 | (defn handle-recall
63 |   "Handle memory retrieval. Dispatch to retrieval method based on `recall-function`.
64 | 
65 |   In case of unspecified retrieval method or not initialized memory system
66 |   return `context` as memories. `Context` is current conversation message, generation prompt,
67 |   or anything else AI gen workflow is currently using."
68 |   [memory-system recall-function context params]
69 |   (if memory-system
70 |     (condp = recall-function
71 |       r/recall-free       (.free-recall memory-system params)
72 |       r/recall-sequential (.sequential-recall memory-system params)
73 |       r/recall-cue        (.cue-recall memory-system params context)
74 |       (do
75 |         (timbre/warnf "Unknown recall method - '%s'. Using 'context' as memories." recall-function)
76 |         context))
77 |     (do
78 |       (timbre/warnf "Memory system is not specified. Using 'context' as memories.")
79 |       context)))
80 | 
81 | (defn available-memories
82 |   [{system        wkk/memory-system
83 |     recall-fn     wkk/recall-function
84 |     recall-params wkk/recall-parameters} messages]
85 |   (if type
86 |     (do
87 |       (timbre/infof "🧠 Retrieving memories.")
88 |       (timbre/info "\t* Memory:" type)
89 |       (timbre/info "\t* Recall:" recall-fn)
90 |       (timbre/info "\t* Params:" recall-params)
91 |       (handle-recall system recall-fn messages recall-params))
92 |     (do
93 |       (timbre/info "No memory specified, using available context as memories")
94 |       messages)))
95 | 


--------------------------------------------------------------------------------
/src/bosquet/memory/retrieval.clj:
--------------------------------------------------------------------------------
  1 | (ns bosquet.memory.retrieval
  2 |   (:require
  3 |    [bosquet.llm.wkk :as wkk]
  4 |    [bosquet.llm.openai-tokens :as oai.tokenizer]
  5 |    [taoensso.timbre :as timbre]))
  6 | 
  7 | ;; Memory types are inspired by
  8 | ;; https://en.wikipedia.org/wiki/Recall_(memory)
  9 | 
 10 | (def recall-free
 11 |   "Free recall is a common task in the psychological study of memory. In this task,
 12 |   participants study a list of items on each trial, and then are prompted to recall the items
 13 |   in any order."
 14 |   :memory.recall/free)
 15 | 
 16 | (def recall-sequential
 17 |   "Serial recall is the ability to recall items or events in the order in which they occurred.
 18 |   The ability of humans to store items in memory and recall them is important to the use of language."
 19 |   :memory.recall/sequential)
 20 | 
 21 | (def recall-cue
 22 |   "Cued recall refers to retrieving information from long-term memory using aids or cues."
 23 |   :memory.recall/cue)
 24 | 
 25 | (defmulti memory-object-size (fn [_memory-object _model llm] llm))
 26 | 
 27 | (defmethod memory-object-size
 28 |   wkk/openai
 29 |   [memory-object model _llm]
 30 |   (oai.tokenizer/token-count memory-object model))
 31 | 
 32 | (defmethod memory-object-size
 33 |   :default
 34 |   [memory-object model llm]
 35 |   (timbre/warnf "No tokenizer for '%s' - '%s'. Using OpenAI tokenization (FIXME)" llm model)
 36 |   (oai.tokenizer/token-count memory-object model))
 37 | 
 38 | (def memory-objects-limit
 39 |   "A limit on how many objects are to be retrieved from the memory.
 40 | 
 41 |   Note that it does not deal with tokens. Thus even a single memory
 42 |   object might be over the token limit"
 43 |   :memory.retrieval/object-limit)
 44 | 
 45 | (def memory-tokens-limit
 46 |   "A limit on how many tokens are to be retrieved from the memory across
 47 |   different memory objects. "
 48 |   :memory.retrieval/token-limit)
 49 | 
 50 | (def memory-content
 51 |   :memory.retrieval/content)
 52 | 
 53 | (def content-similarity-threshold
 54 |   :memory.retrieval/similarity-threshold)
 55 | 
 56 | ;; (defn free-recall-handler [storage _params]
 57 | ;;   (shuffle (.query storage identity)))
 58 | 
 59 | ;; (defn sequential-recall-handler [storage {limit memory-objects-limit}]
 60 | ;;   (take-last limit
 61 | ;;              (.query storage identity)))
 62 | 
 63 | (defn take-while-tokens
 64 |   [{object-limit memory-objects-limit
 65 |     token-limit  memory-tokens-limit
 66 |     content-fn   memory-content
 67 |     model        wkk/model
 68 |     llm          wkk/service
 69 |     :or          {object-limit 100
 70 |                   token-limit  4000
 71 |                   content-fn   identity}}
 72 |    objects]
 73 |   (if token-limit
 74 |     (loop [[object & objects] (reverse (take-last object-limit objects))
 75 |            retrieved-objects  []
 76 |            token-count        (memory-object-size (content-fn object) model llm)]
 77 |       (if (and object (> token-limit token-count))
 78 |         (recur
 79 |          objects
 80 |          (conj retrieved-objects object)
 81 |          (+ token-count (memory-object-size (content-fn (first objects)) model llm)))
 82 |         (reverse retrieved-objects)))
 83 |     (take-last object-limit objects)))
 84 | 
 85 | ;; (defn cue-recall-handler [{content-fn memory-content-fn
 86 | ;;                            :or        {content-fn identity}
 87 | ;;                            :as        params}
 88 | ;;                           objects cue]
 89 | ;;   (let [threshold 0.6]
 90 | ;;     (take-while-tokens
 91 | ;;      (filter (fn [item]
 92 | ;;                (> threshold (nlp/cosine-distance (content-fn item) cue)))
 93 | ;;              objects)
 94 | ;;      params)))
 95 | 
 96 | ;; (def handlers
 97 | ;;   {sequential-recall sequential-recall-handler
 98 | ;;    free-recall       free-recall-handler})
 99 | 
100 | ;; (defn handler [retriever-name]
101 | ;;   (get handlers retriever-name
102 | ;;     ;; default is `sequential-retriever`
103 | ;;        sequential-recall-handler))
104 | 


--------------------------------------------------------------------------------
/src/bosquet/llm/openai_tokens.clj:
--------------------------------------------------------------------------------
  1 | (ns bosquet.llm.openai-tokens
  2 |   "JTokkit wrapper to get encode/decode and get token counts.
  3 |   Plus a price estimator for model produced tokens"
  4 |   (:import
  5 |    [com.knuddels.jtokkit.api IntArrayList]
  6 |    [com.knuddels.jtokkit Encodings]))
  7 | 
  8 | ;;
  9 | ;; ## Encodings and token counting
 10 | ;;
 11 | 
 12 | (defonce registry (Encodings/newDefaultEncodingRegistry))
 13 | 
 14 | (defn encoding
 15 |   "Get encoding by model name. Name is provided as keyword matching names specified in
 16 |    - https://platform.openai.com/docs/models/overview
 17 | 
 18 |   If model name is not found it will throw `NoSuchElementException` exception. "
 19 |   [model]
 20 |   (.get (.getEncodingForModel registry (name model))))
 21 | 
 22 | (defn encode
 23 |   "Encode `text` using `model` (:gpt-4, :gpt-3.5-turbo)"
 24 |   [text model]
 25 |   (-> model encoding (.encode text) .toArray vec))
 26 | 
 27 | (defn decode
 28 |   "Encode `tokens` using `model`"
 29 |   [tokens model]
 30 |   (.decode (encoding model)
 31 |            (reduce (fn [m token]
 32 |                      (.add m token)
 33 |                      m)
 34 |                    (IntArrayList. (count tokens))
 35 |                    tokens)))
 36 | 
 37 | (defn token-count
 38 |   "Count tokens in the `text` using `model` for token production. "
 39 |   [text model]
 40 |   (count (encode text model)))
 41 | 
 42 | (comment
 43 |   (def text "Some random text")
 44 |   (token-count text :gpt-4)
 45 |   (decode (encode "Small test" :gpt-4) :gpt-4)
 46 |   #__)
 47 | 
 48 | ;;
 49 | ;; ## Pricing
 50 | ;;
 51 | 
 52 | (def pricing
 53 |   "OAI model prices (per 1k tokens) and token limits specified in a map:
 54 |   - `input` price for the prompt tokens
 55 |   - `output` price for the completion tokens
 56 |   - `tokens` max context tokens the model supports"
 57 | 
 58 |   {:gpt-4                  {:input  0.003
 59 |                             :output 0.006
 60 |                             :tokens 8192}
 61 |    :gpt-3.5-turbo-16k      {:input  0.003
 62 |                             :output 0.004
 63 |                             :tokens 16384}
 64 |    :gpt-3.5-turbo          {:input  0.0015
 65 |                             :output 0.002
 66 |                             :tokens 4096}
 67 |    :text-embedding-ada-002 {:embeddings 0.0001}
 68 |    :text-babbage-002       {:input  0.0004
 69 |                             :output 0.0004
 70 |                             :tokens 16384}
 71 |    :text-curie-001         {:input  0.002
 72 |                             :output 0.002
 73 |                             :tokens 2049}
 74 |    :text-davinci-003       {:input  0.02
 75 |                             :output 0.02
 76 |                             :tokens 4097}})
 77 | 
 78 | (defn- calc-price
 79 |   ([input-price input model]
 80 |    (calc-price input-price 0
 81 |                input (if (string? input) "" 0)
 82 |                model))
 83 |   ([input-price output-price input output model]
 84 |    (cond
 85 |      (and (string? input) (string? output))
 86 |      (+
 87 |       (* (token-count input model) input-price)
 88 |       (* (token-count output model) output-price))
 89 |      ;; If we have got numbers it must be token counts already
 90 |      (and (number? input) (number? output))
 91 |      (+
 92 |       (* input input-price)
 93 |       (* output output-price)))))
 94 | 
 95 | (defn generation-price-estimate
 96 |   "Estimate price for the `prompt` and `completion` using `model`.
 97 |   If `prompt` and `completion` are strings it will count tokens first.
 98 |   If `prompt` and `completion` are numbers it will assume they are token counts"
 99 |   ([prompt model] (generation-price-estimate prompt "" model))
100 |   ([prompt completion model]
101 |    (let [{:keys [input output]} (model pricing)]
102 |      (calc-price input output prompt completion model))))
103 | 
104 | (defn embeddings-price-estimate
105 |   ([text model] (calc-price
106 |                  (get-in pricing [model :embeddings])
107 |                  text model))
108 |   ([text] (embeddings-price-estimate text :text-embedding-ada-002)))
109 | 
110 | (defn fits-in-context-window? [token-count model]
111 |   (>= (get-in pricing [model :tokens]) token-count))
112 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | When contributing to this repository, please first discuss the change you wish to make via issue,
 4 | email, or any other method with the owners of this repository before making a change. 
 5 | 
 6 | ## Pull Request Process
 7 | 
 8 | 1. Ensure any install or build dependencies are removed before the end of the layer when doing a 
 9 |    build.
10 | 2. Update the README.md with details of changes to the interface, this includes new environment 
11 |    variables, exposed ports, useful file locations and container parameters.
12 | 3. Increase the version numbers in any examples files and the README.md to the new version that this
13 |    Pull Request would represent. The versioning scheme we use is [SemVer](http://semver.org/).
14 | 4. You may merge the Pull Request in once you have the sign-off of other developer.
15 | 
16 | ## Code of Conduct
17 | 
18 | ### Our Pledge
19 | 
20 | In the interest of fostering an open and welcoming environment, we as
21 | contributors and maintainers pledge to making participation in our project and
22 | our community a harassment-free experience for everyone, regardless of age, body
23 | size, disability, ethnicity, gender identity and expression, level of experience,
24 | nationality, personal appearance, race, religion, or sexual identity and
25 | orientation.
26 | 
27 | ### Our Standards
28 | 
29 | Examples of behavior that contributes to creating a positive environment
30 | include:
31 | 
32 | * Using welcoming and inclusive language
33 | * Being respectful of differing viewpoints and experiences
34 | * Gracefully accepting constructive criticism
35 | * Focusing on what is best for the community
36 | * Showing empathy towards other community members
37 | 
38 | Examples of unacceptable behavior by participants include:
39 | 
40 | * The use of sexualized language or imagery and unwelcome sexual attention or
41 | advances
42 | * Trolling, insulting/derogatory comments, and personal or political attacks
43 | * Public or private harassment
44 | * Publishing others' private information, such as a physical or electronic
45 |   address, without explicit permission
46 | * Other conduct which could reasonably be considered inappropriate in a
47 |   professional setting
48 | 
49 | ### Our Responsibilities
50 | 
51 | Project maintainers are responsible for clarifying the standards of acceptable
52 | behavior and are expected to take appropriate and fair corrective action in
53 | response to any instances of unacceptable behavior.
54 | 
55 | Project maintainers have the right and responsibility to remove, edit, or
56 | reject comments, commits, code, wiki edits, issues, and other contributions
57 | that are not aligned to this Code of Conduct, or to ban temporarily or
58 | permanently any contributor for other behaviors that they deem inappropriate,
59 | threatening, offensive, or harmful.
60 | 
61 | ### Scope
62 | 
63 | This Code of Conduct applies both within project spaces and in public spaces
64 | when an individual is representing the project or its community. Examples of
65 | representing a project or community include using an official project e-mail
66 | address, posting via an official social media account, or acting as an appointed
67 | representative at an online or offline event. Representation of a project may be
68 | further defined and clarified by project maintainers.
69 | 
70 | ### Enforcement
71 | 
72 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
73 | reported by contacting the project team at zygis@hey.com. All
74 | complaints will be reviewed and investigated and will result in a response that
75 | is deemed necessary and appropriate to the circumstances. The project team is
76 | obligated to maintain confidentiality with regard to the reporter of an incident.
77 | Further details of specific enforcement policies may be posted separately.
78 | 
79 | Project maintainers who do not follow or enforce the Code of Conduct in good
80 | faith may face temporary or permanent repercussions as determined by other
81 | members of the project's leadership.
82 | 
83 | ### Attribution
84 | 
85 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
86 | available at [http://contributor-covenant.org/version/1/4][version]
87 | 
88 | [homepage]: http://contributor-covenant.org
89 | [version]: http://contributor-covenant.org/version/1/4/
90 | 


--------------------------------------------------------------------------------
/src/bosquet/agent/react.clj:
--------------------------------------------------------------------------------
 1 | (ns bosquet.agent.react
 2 |   (:require
 3 |    [bosquet.agent.tool :as t]
 4 |    [bosquet.llm.generator :as gen]
 5 |    [bosquet.template.read :as template]))
 6 | 
 7 | #_(timbre/merge-config!
 8 |    {:appenders {:println {:enabled? false}
 9 |                 :spit    (appenders/spit-appender {:fname "bosquet.log"})}})
10 | 
11 | #_(defn generate-thoughts
12 |     "Generate ReAct thoughts.
13 |   `ctx` has needed data points
14 |   `prompt-palette` has all the prompts needed for ReAct
15 |   `prompt-key` is the key to the prompt to be used to start the generation."
16 |     [{:bosquet/keys [task-prompts services]} ctx prompt-key]
17 |     (let [{{:react/keys [memory thought action]}
18 |            gen/completions} (gen/generate services task-prompts ctx)]
19 | 
20 |       (tap> {:reasoning-trace trace
21 |              :thoughts        thoughts
22 |              :all x})
23 | 
24 |     ;; :resoning-trace will contain only the thoughts from before,
25 |     ;; most recent observation goes into :thoughts
26 |       {:reasoning-trace trace
27 |        :thoughts        thoughts}))
28 | 
29 | (defn focus-on-observation
30 |   "Get the sentence a the position `lookup-index` from the observation."
31 |   [{:keys [lookup-db lookup-index]}]
32 |   ;; last is the position of the sentence in the tuple
33 |   (last (get lookup-db lookup-index)))
34 | 
35 | (defn solve-task
36 |   "Solve a task using [ReAct](https://react-lm.github.io)
37 | 
38 |   First `agent` parameter specifies which tool will be used to solve the task.
39 |   Second context parameter gives initialization data to start working
40 |   - `task` is a quesiton ar task formulation for the agent
41 |   - `max-steps` specifies how many thinking steps agent is allowed to do
42 |   it either reaches that number of steps or 'Finish' action, and then terminates."
43 |   [{:bosquet/keys [_max-steps tools task-prompts]} task]
44 |   (let [tool              (first tools)
45 |         initial-ctx       {:react/task task
46 |                            :react/step 1}
47 |         _                 (t/print-thought
48 |                            (format "'%s' tool has the following task" (t/my-name tool)) task)
49 |         {x gen/completions} (gen/generate task-prompts initial-ctx)]
50 |     (tap> x)
51 |     #_(loop [step            1
52 |              ctx             initial-ctx
53 |              thoughts        thoughts
54 |              reasoning-trace reasoning-trace]
55 |         (let [{:keys [action thought parameters] :as action-ctx}
56 |               (mind-reader/find-action step thoughts)
57 |               ctx         (merge ctx action-ctx {:step step})
58 |               _           (t/print-indexed-step "Thought" thought step)
59 |               _           (t/print-action action parameters step)
60 |               observation (t/call-tool tool action ctx)]
61 |           (cond
62 |             ;; Tool failed to find a solution in max steps allocated
63 |             (= step max-steps)
64 |             (do
65 |               (t/print-too-much-thinking-error step)
66 |               nil)
67 | 
68 |             ;; Tool got to the solution. Print and return it
69 |             (= :finish action)
70 |             (do
71 |               (t/print-result observation)
72 |               observation)
73 | 
74 |             ;; Continue thinking
75 |             :else
76 |             (let [current-observation (focus-on-observation observation)
77 |                   _                   (t/print-indexed-step "Observation" current-observation step)
78 |                   {:keys [thoughts reasoning-trace]}
79 |                   (generate-thoughts
80 |                    cfg
81 |                    {:react/step            (inc step)
82 |                     :react/reasoning-trace (str reasoning-trace thought)
83 |                     :react/observation     current-observation}
84 |                    :react/step-n)]
85 |               (recur (inc step) ctx thoughts reasoning-trace)))))))
86 | 
87 | (comment
88 |   (import '[bosquet.agent.wikipedia Wikipedia])
89 |   (def prompt-palette (template/load-palettes "resources/prompt-palette/agent"))
90 |   (def question
91 |     "Author David Chanoff has collaborated with a U.S. Navy admiral who served as the ambassador to the United Kingdom under which President?")
92 | 
93 |   (solve-task
94 |    {:bosquet/tools        [(Wikipedia.)]
95 |     :bosquet/task-prompts prompt-palette
96 |     :bosquet/max-steps    5}
97 |    question)
98 |   #__)
99 | 


--------------------------------------------------------------------------------
/src/bosquet/eval/evaluator.clj:
--------------------------------------------------------------------------------
  1 | (ns bosquet.eval.evaluator
  2 |   (:require
  3 |    [bosquet.llm.generator :as gen]
  4 |    [bosquet.memory.long-term-memory]
  5 |    [bosquet.nlp.splitter :as splitter]
  6 |    [bosquet.read.document :as document]
  7 |    [bosquet.wkk :as wkk]
  8 |    [taoensso.timbre :as timbre]))
  9 | 
 10 | ;; Prompts taken from
 11 | ;; https://github.com/run-llama/llama_index/blob/f065b6c103677b33990cdd3054a7918c0fe793f8/llama_index/evaluation/correctness.py
 12 | 
 13 | (def eval-prompt
 14 |   {:system ["You are an expert question answering evaluation system."
 15 | 
 16 |             "You are given the following information:"
 17 |             "- a user query,"
 18 |             "- a reference answer, and"
 19 |             "- a generated answer."
 20 |             ""
 21 |             "Your job is to judge the relevance and correctness of the generated answer."
 22 |             "Output a single score that represents a holistic evaluation."
 23 |             "You must return your response in a line with only the score."
 24 |             "Do not return answers in any other format."
 25 |             "On a separate line provide your reasoning for the score as well."
 26 |             ""
 27 |             "Follow these guidelines for scoring:"
 28 |             "- Your score has to be between 1 and 5, where 1 is the worst and 5 is the best."
 29 |             "- If the generated answer is not relevant to the user query, you should give a score of 1."
 30 |             "- If the generated answer is relevant but contains mistakes, you should give a score between 2 and 3."
 31 |             "- If the generated answer is relevant and fully correct, you should give a score between 4 and 5."
 32 |             ""
 33 |             "Example Response:"
 34 |             "Score: 4.0"
 35 |             "Explanation: The generated answer has the exact same metrics as the reference answer,"
 36 |             "but it is not as concise."]
 37 |    :eval  ["{{system}}"
 38 |            ""
 39 |            "## User Query"
 40 |            "{{query}}"
 41 |            ""
 42 |            "## Reference Answer"
 43 |            "{{reference-answer}}"
 44 |            ""
 45 |            "## Generated Answer"
 46 |            "{{generated-answer}}"
 47 |            ""
 48 |            "## Evaluation"
 49 |            "{{score}}"]
 50 |    :score (gen/llm :gpt-4)})
 51 | 
 52 | (defn evaluate-answer
 53 |   [question reference-answer generated-answer]
 54 |   (try
 55 |     (let [resp (gen/generate
 56 |                 eval-prompt
 57 |                 {:query question
 58 |                  :reference-answer reference-answer
 59 |                  :generated-answer generated-answer}
 60 |                 {:score wkk/gpt4-turbo-with-cache})]
 61 |       (->> resp
 62 |            :score
 63 |            (re-find #"Score: (.*)")
 64 |            second
 65 |            (Double/parseDouble)))
 66 |     (catch Exception e
 67 |       (timbre/errorf "Failed to parse evaluation answer - %s" (ex-message e)))))
 68 | 
 69 | ;; TODO this does not belong here
 70 | (defn store-knowledge
 71 |   [{collection-name :collection-name :as opts}
 72 |    memory knowledge]
 73 |   (let [chunks  (splitter/chunk-text
 74 |                  {splitter/chunk-size 20 splitter/split-unit splitter/sentence}
 75 |                  knowledge)
 76 |         _       (timbre/debugf "Got %s cunks to remember" (count chunks))]
 77 |     (.forget memory opts)
 78 |     ;; FIXME
 79 |     (.create nil #_storage collection-name)
 80 |     (doseq [chunk chunks]
 81 |       (.remember memory chunk opts))))
 82 | 
 83 | ;; TODO this does not belong here
 84 | (defn query
 85 |   [opts memory query]
 86 |   (->
 87 |    (.cue-recall memory query opts)
 88 |    first :payload :text))
 89 | 
 90 | (comment
 91 |   #_(import 'bosquet.db.qdrant.Qdrant)
 92 |   #_(import 'bosquet.nlp.embeddings.OAIEmbeddings)
 93 |   (def opts {:collection-name "llama2-qna-eval"})
 94 |   #_(def memory (LongTermMemory.
 95 |                  (Qdrant. (:qdrant env/config))
 96 |                  (OAIEmbeddings. (:openai env/config))))
 97 |   (def text (:text (document/parse "data/llama2.pdf")))
 98 | 
 99 |   #_(store-knowledge opts memory text)
100 |   #_(query opts memory "What are the inputs and outputs to Reward Modeling?")
101 | 
102 |   (evaluate-answer
103 |    "What are the inputs and outputs to Reward Modeling?"
104 |    "The reward model takes a model response and its corresponding prompt as inputs. It outputs a scalar score to indicate the quality of the model generation."
105 |    "Inputs: response. Outputs: score")
106 | 
107 |   #__)
108 | 


--------------------------------------------------------------------------------
/src/bosquet/env.clj:
--------------------------------------------------------------------------------
  1 | (ns bosquet.env
  2 |   (:refer-clojure :exclude [val])
  3 |   (:require
  4 |    [aero.core :as aero]
  5 |    [bosquet.llm.wkk :as wkk]
  6 |    [bosquet.utils :as u]
  7 |    [clojure.java.io :as io]
  8 |    [taoensso.timbre :as log]
  9 |    [bosquet.mcp.core :as mcp]))
 10 | 
 11 | (defn exists? [file] (.exists file))
 12 | 
 13 | (defn bosquet-cfg-file
 14 |   "Get Bosquet config file (secrets.edn or config.edn). First check project root
 15 |   then go to ~/.bosquet"
 16 |   [cfg-file-name]
 17 |   (let [local-file    (io/file (str "./" cfg-file-name))
 18 |         home-dir-file (io/file (System/getProperty "user.home")
 19 |                                (str ".bosquet/" cfg-file-name))]
 20 |     (cond
 21 |       (exists? local-file)    local-file
 22 |       (exists? home-dir-file) home-dir-file
 23 |       :else                   (do
 24 |                                 (spit local-file "{}")
 25 |                                 local-file))))
 26 | 
 27 | (def config-file
 28 |   "Config file to override `env.edn` or add new components: LLM providers, memory, tools."
 29 |   (bosquet-cfg-file "config.edn"))
 30 | 
 31 | (def secrets-file
 32 |   "API keys and other things not to be shared"
 33 |   (bosquet-cfg-file "secrets.edn"))
 34 | 
 35 | (defmethod aero/reader 'mmerge
 36 |   [_opts _tag value]
 37 |   (apply merge-with merge value))
 38 | 
 39 | (defmethod aero/reader 'include-config
 40 |   [_opts _tag value]
 41 |   (let [cfg-file (bosquet-cfg-file value)
 42 |         config   (if cfg-file (u/read-edn-file cfg-file) {})]
 43 |     (when (empty? config)
 44 |       (log/infof "No '%s' configuration, using defaults if applicable."
 45 |                  value))
 46 |     config))
 47 | 
 48 | (defn- read-edn
 49 |   [file]
 50 |   (if (and file (.exists file))
 51 |     (-> file slurp read-string)
 52 |     {}))
 53 | 
 54 | (def config
 55 |   (aero/read-config (io/resource "env.edn")))
 56 | 
 57 | (def model-providers
 58 |   "A list of model names supported by this service. It is an
 59 |    optional data point that allows a shortcut when defining LLM
 60 |    calls with (generator/llm) function. Instead of
 61 |    `(llm :openai :model-params {:model :gpt-3.5})`
 62 |    a shorthand of `(llm :gpt-3.5)` will work"
 63 |   (reduce-kv (fn [m k {:keys [model-names chat-fn complete-fn]}]
 64 |                ;; The IF is a product of not separating llm definitions
 65 |                ;; from other stuff like QDRANT in edn.env
 66 |                ;; It does not hurt to have qdrant def being processed here
 67 |                ;; but it would add junk
 68 |                (if (or chat-fn complete-fn)
 69 |                  (reduce (fn [model-mapping model-name]
 70 |                            (assoc model-mapping model-name k))
 71 |                          m model-names)
 72 |                  m))
 73 |              {}
 74 |              config))
 75 | 
 76 | (defn val
 77 |   "Get configuration at path"
 78 |   [& path]
 79 |   (get-in config path))
 80 | 
 81 | (defn- merge-config [cfg conf-path value]
 82 |   (merge cfg (assoc-in cfg conf-path value)))
 83 | 
 84 | (defn- update-props-file
 85 |   [file conf-path value]
 86 |   (let [cfg (read-edn file)]
 87 |     (try
 88 |       (io/make-parents file)
 89 |       (spit file
 90 |             (-> cfg
 91 |                 (merge-config conf-path value)
 92 |                 u/pp-str))
 93 |       (catch Exception ex
 94 |         (println "Failed to update config file: " (.getMessage ex))
 95 |         (println "Restoring config.")
 96 |         (spit file cfg)))))
 97 | 
 98 | (def update-config-file
 99 |   (partial update-props-file config-file))
100 | 
101 | (def update-secrets-file
102 |   (partial update-props-file secrets-file))
103 | 
104 | (defn configured-api-keys
105 |   "Get a list of keys set in `secrects.edn`"
106 |   []
107 |   (-> secrets-file (read-edn) keys))
108 | 
109 | (defn default-service
110 |   "Get default LLM service as defiened in config.edn.
111 |   In case default is not defined, fall back to OpenAI"
112 |   []
113 |   (let [default-llm (:default-llm config)
114 |         default-llm (if default-llm
115 |                       default-llm
116 |                       (-> config :openai :model-params))]
117 |     {wkk/service      (-> default-llm :model model-providers)
118 |      wkk/model-params (dissoc default-llm
119 |                               :service :default-for-models
120 |                               :api-key :api-endpoint :impl
121 |                               wkk/service wkk/chat-fn wkk/complete-fn wkk/embed-fn)}))
122 | 
123 | (defn initialize-mcp-tools
124 |   "Initialize mcp tools as defined in config.edn.
125 |   Once initialized all the tool definitions are included in the bosquet.mcp.tools namespace.
126 |   You can then pass the tool symbols in wkk/tools or use any of the functions in the llm/tools
127 |   namespace to extract metadata etc"
128 |   []
129 |   (when-let [mcp-servers (:mcp-servers config)]
130 |     (mcp/initialize-mcp-servers! mcp-servers)))
131 | 


--------------------------------------------------------------------------------
/src/bosquet/nlp/splitter.clj:
--------------------------------------------------------------------------------
  1 | (ns bosquet.nlp.splitter
  2 |   (:require
  3 |    [bosquet.llm.openai-tokens :as oai]
  4 |    [clojure.java.io :as io]
  5 |    [clojure.string :as string]
  6 |    [taoensso.timbre :as timbre])
  7 |   (:import
  8 |    [opennlp.tools.sentdetect SentenceDetectorME SentenceModel]))
  9 | 
 10 | (def chunk-size
 11 |   "Since of the chunks in which the text gets split."
 12 |   :splitter/chunk-size)
 13 | 
 14 | (def split-unit
 15 |   "Lexical units in which the text gets split: character, token, sentence."
 16 |   :splitter/unit)
 17 | 
 18 | (def overlap
 19 |   "Number of units by which chunks can overlap."
 20 |   :splitter/overlap)
 21 | 
 22 | (def model
 23 |   "Model used for tokenization."
 24 |   :splitter/model)
 25 | 
 26 | (def sentence
 27 |   "Text splitter by sentences. It will use OpenNLP sentnce splitter to partition
 28 |   the text."
 29 |   :splitter/sentence-split)
 30 | 
 31 | (def character
 32 |   "Text splitter by individual characters."
 33 |   :splitter/character-split)
 34 | 
 35 | (def token
 36 |   "Text splitter by tokens. Tokenization is done based on the provided model."
 37 |   :splitter/token-split)
 38 | 
 39 | (defn- load-splitter-model
 40 |   "Load OpenNLP model for a `lang` sentence boundary detection.
 41 | 
 42 |   See https://opennlp.apache.org/models.html"
 43 |   [lang]
 44 |   (let [model-file (io/file (format "models/lang/%s/sentence-detector.bin" (name lang)))]
 45 |     (if (.exists model-file)
 46 |       (SentenceDetectorME. (SentenceModel. model-file))
 47 |       (timbre/errorf
 48 |        "‼️ Sentence detenction model file is not found. Use `bb lang:sent:%s` to download."
 49 |        (name lang)))))
 50 | 
 51 | (defn- text-units-length
 52 |   [units]
 53 |   (reduce (fn [cnt chunk] (+ cnt (count chunk))) 0 units))
 54 | 
 55 | (defn text-splitter
 56 |   [{:splitter/keys [chunk-size overlap]
 57 |     :or            {overlap 0}}
 58 |    text-units]
 59 |   (let [unit-count (count text-units)]
 60 |     (loop [chunks      []
 61 |            current-pos (text-units-length chunks)]
 62 |       (if (> current-pos unit-count)
 63 |         chunks
 64 |         (recur
 65 |          (conj chunks
 66 |                (subvec text-units
 67 |                        (max (- current-pos overlap) 0)
 68 |                        (min (+ (- current-pos
 69 |                                   ;; cant figure nicer way, this if is needed
 70 |                                   ;; to avoid getting the first chunk made shorter by overlap
 71 |                                   (if (zero? current-pos) 0 overlap)) chunk-size)
 72 |                             unit-count)))
 73 |          (+ current-pos (- chunk-size
 74 |                            (if (zero? current-pos) 0 overlap))))))))
 75 | 
 76 | (defn- text->sentences
 77 |   "Split `text` into sentences using OpenNLP sentence splitting model"
 78 |   [{:keys [lang] :or {lang :en}} text]
 79 |   (-> lang
 80 |       load-splitter-model
 81 |       (.sentDetect text)
 82 |       vec))
 83 | 
 84 | (defn- text<-sentences
 85 |   [_opts sentences]
 86 |   (string/join " " sentences))
 87 | 
 88 | (defn- text->characters
 89 |   [_opts text]
 90 |   (vec (map identity text)))
 91 | 
 92 | (defn- text<-characters
 93 |   [_opts chars]
 94 |   (string/join chars))
 95 | 
 96 | (defn- text->tokens [{:splitter/keys [model]} text]
 97 |   (vec (oai/encode text model)))
 98 | 
 99 | (defn- text<-tokens [{:splitter/keys [model]} text]
100 |   (oai/decode text model))
101 | 
102 | (def split-handlers
103 |   "Split handlers are needed to turn text into specified text units via `encode` function.
104 |   `decode` function will turn those units back into single text string."
105 |   {sentence  {:encode text->sentences
106 |               :decode text<-sentences}
107 |    character {:encode text->characters
108 |               :decode text<-characters}
109 |    token     {:encode text->tokens
110 |               :decode text<-tokens}})
111 | 
112 | (defn chunk-text
113 |   "Chunk `text` into `chunk-size` blocks using specified `splitter`. Optionaly
114 |   `overlap` can be specified by how many text units chunks can overap (defaults to 0).
115 | 
116 |   Supported text splitters:
117 |   - `sentence-splitter`
118 |   - `character-splitter`
119 |   - `token-splitter`"
120 |   [{:splitter/keys [unit] :as opts} text]
121 |   (let [{:keys [encode decode]} (split-handlers unit)
122 |         encode                  (partial encode opts)
123 |         decode                  (partial decode opts)]
124 |     (->> text
125 |          encode
126 |          (text-splitter opts)
127 |          (map decode))))
128 | 
129 | (comment
130 | 
131 |   (def text (slurp "https://raw.githubusercontent.com/scicloj/scicloj.ml.smile/main/LICENSE"))
132 | 
133 |   (text->sentences nil text)
134 | 
135 |   (tap>
136 |    (chunk-text {chunk-size 3 split-unit sentence} text))
137 | 
138 |   (chunk-text {chunk-size 10 split-unit token model :gpt-4}
139 |               "Think not, is my eleventh commandment; and sleep when you can, is my twelfth.")
140 |   #__)
141 | 


--------------------------------------------------------------------------------
/src/bosquet/template/selmer.clj:
--------------------------------------------------------------------------------
  1 | (ns bosquet.template.selmer
  2 |   (:require
  3 |    [clojure.set :as set]
  4 |    [clojure.string :as string]
  5 |    [selmer.filter-parser :refer [literal? split-value]]
  6 |    [selmer.parser :as selmer]
  7 |    [selmer.tags :as tag]
  8 |    [selmer.util :as util]))
  9 | 
 10 | ;;
 11 | ;; Copy/Paste from Selmer
 12 | ;; Enables `known-variables-in-order`
 13 | ;;
 14 | 
 15 | (defn- parse-variable-paths
 16 |   "
 17 |   takes in vals like: \"person.name|capitalize\"
 18 |   and produces [:person :name]
 19 |   "
 20 |   [arg]
 21 |   (some-> arg split-value first util/parse-accessor))
 22 | 
 23 | (defn ^:private parse-variables [tags]
 24 |   (loop [vars        []                 ; Selmer uses 'set'
 25 |          nested-keys []                 ; Selmer uses 'set'
 26 |          tags        tags]
 27 |     (if-let [{:keys [tag-type tag-name tag-value args]} (first tags)]
 28 |       (cond
 29 |         (= :filter tag-type) (let [v               (parse-variable-paths tag-value)
 30 |                                    should-add-var? (when (vector? v)
 31 |                                                      (not (contains? nested-keys (first v))))
 32 |                                    updated-vars    (cond-> vars
 33 |                                                      should-add-var? (conj v))]
 34 |                                (recur
 35 |                                 updated-vars
 36 |                                 nested-keys
 37 |                                 (rest tags)))
 38 |         (= :for tag-name)    (let [[ids [_ items]] (tag/aggregate-args args)]
 39 |                                (recur
 40 |                                 (conj vars (parse-variable-paths items))
 41 |                                 (conj (set (map keyword ids)) :forloop)
 42 |                                 (rest tags)))
 43 | 
 44 |         (= :with tag-name) (let [[id value] (string/split (first args) #"=")]
 45 |                              (recur
 46 |                               (conj vars (parse-variable-paths value))
 47 |                               #{(keyword id)}
 48 |                               (rest tags)))
 49 | 
 50 |         (contains? #{:endfor :endwith} tag-name) (recur vars #{} (rest tags))
 51 | 
 52 |         :else
 53 |         (let [special-syms   #{nil :not :all :any :< :> := :<= :>=}
 54 |               should-remove? (fn [[var-head]]
 55 |                                (or
 56 |                                 (special-syms var-head)
 57 |                                 (nested-keys  var-head)))]
 58 |           (recur (set/union
 59 |                   vars
 60 |                   (->> args
 61 |                        (filter (complement literal?))
 62 |                        (map parse-variable-paths)
 63 |                        (remove should-remove?)
 64 |                        set))
 65 |                  nested-keys
 66 |                  (rest tags))))
 67 |       vars)))
 68 | 
 69 | ;; ---
 70 | 
 71 | (defn known-variables-in-order
 72 |   "The same as Selmer's `known-variables` but do not produce
 73 |   set thus loosing the order"
 74 |   [input & [opts]]
 75 |   (->> (or opts {})
 76 |        (selmer/parse selmer/parse-input (java.io.StringReader. input))
 77 |        meta
 78 |        :all-tags
 79 |        parse-variables
 80 |        (map first)))
 81 | 
 82 | (defn known-variables
 83 |   "Known variables in the `template` or templates"
 84 |   [template]
 85 |   (let [xf (comp
 86 |             (remove nil?)
 87 |             (mapcat (fn [template]
 88 |                       (when-not (map? template)
 89 |                         (selmer/known-variables template)))))]
 90 |     (into #{} xf (if (string? template) [template] template))))
 91 | 
 92 | (defn render [text ctx]
 93 |   (let [text (str text)]
 94 |     (if (string/blank? text)
 95 |       text
 96 |       (util/without-escaping
 97 |        (selmer/render text ctx)))))
 98 | 
 99 | (defn missing-value-noop [tag _context-map]
100 |   (format "{{%s}}" (:tag-value tag)))
101 | 
102 | (defn set-missing-value-formatter
103 |   "Since some of the slots are AI-generated later in the process,
104 |   do not touch slots that have no date in parsing context."
105 |   []
106 |   (util/set-missing-value-formatter! missing-value-noop))
107 | 
108 | (defn- kw->str
109 |   [kw]
110 |   (let [ns        (namespace kw)
111 |         name      (name kw)]
112 |     (string/replace
113 |      (if ns (str ns "/" name) name)
114 |      ;; ecape '.' for Selmer
115 |      "." "..")))
116 | 
117 | (defn clear-gen-var-slot
118 |   "Remove a `slot` reference from the `template` and
119 |   all the text after it. This is to enforce the generation
120 |   context up to the generation slot.
121 | 
122 |   `template` = '{{x}}^2 = {{y}} further text'
123 |   `slot` = '{{y}}'
124 |   => '{{x}}^2 = '"
125 |   [template slot]
126 |   (string/replace
127 |    template
128 |    (->> slot kw->str (format "\\{\\{%s\\}\\}.*") re-pattern)
129 |    ""))
130 | 
131 | (defn append-slot
132 |   [template slot]
133 |   (format "%s {{%s}}" template (kw->str slot)))
134 | 


--------------------------------------------------------------------------------
/src/bosquet/cli.clj:
--------------------------------------------------------------------------------
  1 | (ns bosquet.cli
  2 |   (:require
  3 |    [bosquet.env :as env]
  4 |    [bosquet.llm.generator :as gen]
  5 |    [bosquet.llm.http :as http]
  6 |    [bosquet.template.read :as read]
  7 |    [clojure.java.io :as io]
  8 |    [clojure.pprint :as pp]
  9 |    [clojure.string :as string]
 10 |    [clojure.tools.cli :refer [parse-opts]]
 11 |    [taoensso.timbre :as timbre])
 12 |   (:gen-class))
 13 | 
 14 | (def cli-options
 15 |   [["-p" "--prompt-file PROMPT-FILE" "File containing either chat, graph, or plain string prompt"
 16 |     :validate [#(.exists (io/file %)) "Prompt file is not found."]]
 17 |    ["-d" "--data-file DATA-FILE" "File containing context data for the prompts"
 18 |     :validate [#(.exists (io/file %)) "Data file is not found."]]
 19 |    [nil "--model MODEL" "Model name"
 20 |     :parse-fn keyword]
 21 |    [nil "--max-tokens NUMBER" "Max tokens to generate"
 22 |     :default 300
 23 |     :parse-fn #(Integer/parseInt %)
 24 |     :validate [pos? "Max tokens must be > 0"]]
 25 |    [nil "--temperature TEMP" "Generation temerature"
 26 |     :id :temperature
 27 |     :default 0
 28 |     :parse-fn #(Float/parseFloat %)
 29 |     :validate [#(<= 0 % 1) "Temperature value must be between 0.0 and 1.0"]]
 30 |    ["-s" "--service SERVICE" "LLM service provider"
 31 |     :id :service
 32 |     :parse-fn keyword]
 33 |    [nil "--proxy" "Use localy configured (localhost:8080) proxy for request/response logging"]
 34 |    [nil "--proxy-host HOST" "Hostname for the proxy"]
 35 |    [nil "--proxy-port PORT" "Port for the proxy"]
 36 |    [nil "--keystore-password PSW" "Password to Bosquet keystore (defaults to 'changeit')"]
 37 | 
 38 |    ["-h" "--help" nil]])
 39 | 
 40 | (defn usage
 41 |   [summary]
 42 |   (->> ["Bosquet CLI tool. Run LLM generations based on suplied prompts and data."
 43 |         ""
 44 |         "Usage: bllm action-or-prompt [options]"
 45 |         ""
 46 |         "Options:"
 47 |         summary
 48 |         ""
 49 |         "Management actions:"
 50 |         ""
 51 |         " keys                      manage LLM service keys"
 52 |         "   - set [service name]    set a key for a given serivice"
 53 |         "   - list                  list registered services with keys"
 54 |         " llms                      manage model parameters"
 55 |         "   - set                   set default model parameters"
 56 |         "   - defaults              show model defaults"
 57 |         "   - list                  show supported LLM services"
 58 |         ""
 59 |         "Generation actions:"
 60 |         ""
 61 |         " \"prompt string\"         running with prompt string will trigger generation using default model"
 62 |         ""
 63 |         "Please refer to https://github.com/zmedelis/bosquet for more information."]
 64 |        (string/join \newline)
 65 |        println))
 66 | 
 67 | (defn- read-input [label]
 68 |   (printf "%s: " (name label))
 69 |   (flush)
 70 |   (read-line))
 71 | 
 72 | (defn- set-key [llm-name]
 73 |   (print "Enter key:")
 74 |   (flush)
 75 |   (when-let [api-key (String. (.readPassword (System/console)))]
 76 |     (env/update-secrets-file [llm-name :api-key] api-key)))
 77 | 
 78 | (defn- list-set-keys []
 79 |   (doseq [llm (env/configured-api-keys)]
 80 |     (println (name llm))))
 81 | 
 82 | (defn- config-path
 83 |   []
 84 |   (println (str env/config-file)))
 85 | 
 86 | (defn show-defaults
 87 |   []
 88 |   (println (env/default-service)))
 89 | 
 90 | (defn- set-default [options]
 91 |   (env/update-config-file [:default-llm] options)
 92 |   (println "Defaults:")
 93 |   (show-defaults))
 94 | 
 95 | (defn- list-llms
 96 |   []
 97 |   (doseq [llm [:openai :openai-azure :cohere :lmstudio :mistral]]
 98 |     (println (name llm))))
 99 | 
100 | (defn- collect-data
101 |   "Ask user to enter data in the console prompt"
102 |   [prompts]
103 |   (loop [m {}
104 |          [slot & slots] (read/data-slots prompts)]
105 |     (if slot
106 |       (recur (assoc m slot (read-input slot)) slots)
107 |       m)))
108 | 
109 | (defn- call-llm
110 |   "Do the call to LLM and print out the results"
111 |   [prompt {:keys [prompt-file data-file proxy proxy-host proxy-port keystore-password]
112 |            :or   {keystore-password "changeit"}}]
113 |   (when proxy (http/use-local-proxy))
114 |   (when (and proxy-host proxy-port) (http/use-local-proxy proxy-host proxy-port keystore-password))
115 |   (let [prompts   (if prompt prompt (-> prompt-file slurp read-string))
116 |         user-data (if data-file
117 |                     (-> data-file slurp read-string)
118 |                     (collect-data prompts))]
119 |     (if prompt-file
120 |       (doseq [data (if (vector? user-data) user-data [user-data])]
121 |         (pp/pprint (gen/generate prompts data)))
122 |       (println (gen/generate prompt user-data)))))
123 | 
124 | (defn- action [options arguments]
125 |   (let [[action arg param & _rest] (map keyword arguments)]
126 |     (condp = action
127 |       :llms (condp = arg
128 |               :set      (set-default options)
129 |               :defaults (show-defaults)
130 |               :list     (list-llms)
131 |               (list-llms))
132 |       :keys (condp = arg
133 |               :set  (set-key param)
134 |               :list (list-set-keys)
135 |               :path config-path
136 |               (list-set-keys))
137 |       (call-llm (first arguments) options))))
138 | 
139 | (defn -main [& args]
140 |   (timbre/set-min-level! :error)
141 |   (let [{:keys [options arguments errors summary]} (parse-opts args cli-options)]
142 |     (cond
143 |       (seq errors)           (doseq [err errors] (println err))
144 |       (:prompt-file options) (action options arguments)
145 |       (or (empty? arguments)
146 |           (:help options))   (usage summary)
147 |       :else                  (action options arguments))))
148 | 


--------------------------------------------------------------------------------
/notebook/papers/chain_of_density.clj:
--------------------------------------------------------------------------------
  1 | ^{:nextjournal.clerk/visibility {:code :hide}}
  2 | (ns papers.chain-of-density
  3 |   (:require
  4 |    [bosquet.llm.generator :as g]
  5 |    [bosquet.llm.wkk :as wkk]
  6 |    [nextjournal.clerk :as clerk]))
  7 | 
  8 | ;; ## Chain of Density prompting
  9 | ;;
 10 | ;; Chain of Density (CoD) technique is introduced in [GPT-4 Summarization with Chain of Density Prompting](https://arxiv.org/pdf/2309.04269.pdf) paper.
 11 | ;; It aims to produce high-quality and dense information text summaries.
 12 | 
 13 | ;; > Selecting the “right” amount of information to include in a summary is a difficult task. A good summary should be detailed
 14 | ;; > and entity-centric without being overly dense and hard to follow.
 15 | ;;
 16 | ;; ![CoD](notebook/assets/cod.png)
 17 | ;;
 18 | ;; CoD constructs a prompt that iteratively adds not yet summarized entities to the summary while keeping the overall summary length constant.
 19 | ;; As it goes through the iterations, it produces increasingly dense summaries. Initial summaries are too sparse, while the final ones are
 20 | ;; usually too dense. Second and third versions being the best ones.
 21 | 
 22 | ;; Another nice feature of the CoD prompt is that it keeps all the summary iteration prompts alongside key entities added to the summary in
 23 | ;; a generated JSON output. This allows us to inspect the intermediate steps of the summary generation, see how the summary is evolving,
 24 | ;; and choose the best one.
 25 | ;;
 26 | ;; **For the impatient** - to see the Cot summarization output, jump to the end of this notebook to see the generated summaries.
 27 | ;;
 28 | ;; ## Implementation
 29 | ;;
 30 | ;; Let's take a Wikipedia article on [2023 Herat earthquakes](https://en.wikipedia.org/wiki/2023_Herat_earthquakes) and generate a summary of it using the CoD technique.
 31 | 
 32 | (def article (slurp "notebook/papers/2023_Herat_earthquakes.txt"))
 33 | 
 34 | ;; Prompt taken from the paper. Note its structure:
 35 | ;; - Instructing to proceed in iterations
 36 | ;; - Each iteration asks to produce a denier summary based on missing entities
 37 | ;; - Guidelines instructing to proceed with summary generation preserving the length and already conveyed information
 38 | ;; - Output shape to include missing entities and summary
 39 | ;;
 40 | ;; *Bosquet* allows adding some extra configuration to the prompt.
 41 | ;; - `LENGTH-IN-SENTENCES` and `LENGTH-IN-WORDS` allows to control the lenght of the summary, *Selmer* templating allows to add the defaults for those values.
 42 | ;; - `FORMAT` to control the output format, defaults to `JSON` (more on that later)
 43 | 
 44 | (def cod-prompt
 45 |   [[:user
 46 |     "Article: {{ ARTICLE }}
 47 | 
 48 | You will generate increasingly concise, entity-dense summaries of the above article.
 49 | 
 50 | Repeat the following 2 steps 5 times.
 51 | 
 52 | Step 1. Identify 1-3 informative entities (\";\" delimited) from the article which are missing from the previously generated summary.
 53 | 
 54 | Step 2. Write a new, denser summary of identical length which covers every entity and detail from the previous summary plus the missing entities.
 55 | 
 56 | A missing entity is:
 57 | - relevant to the main story,
 58 | - specific yet concise (5 words or fewer),
 59 | - novel (not in the previous summary),
 60 | - faithful (present in the article),
 61 | - anywhere (can be located anywhere in the article).
 62 | 
 63 | Guidelines:
 64 | 
 65 | - The first summary should be long ({{LENGTH-IN-SENTENCES|default:3-4}} sentences, ~{{LENGTH-IN-WORDS|default:80}} words) yet highly non-specific, containing little information beyond the entities marked as missing.
 66 |   Use overly verbose language and fillers (e.g., \"this article discusses\") to reach ~{{LENGTH-IN-WORDS|default:80}} words.
 67 | - Make every word count: rewrite the previous summary to improve flow and make space for additional entities.
 68 | - Make space with fusion, compression, and removal of uninformative phrases like \"the article discusses\".
 69 | - The summaries should become highly dense and concise yet self-contained, i.e., easily understood without the article.
 70 | - Missing entities can appear anywhere in the new summary.
 71 | - Never drop entities from the previous summary. If space cannot be made, add fewer new entities.
 72 | 
 73 | Remember, use the exact same number of words for each summary. Answer in {{FORMAT|default:JSON}}. The {{FORMAT|default:JSON}} should be a list (length 5) of dictionaries whose keys
 74 | are \"Missing-Entities\" and \"Denser-Summary\".
 75 | 
 76 | {{sum-gen}}"]
 77 |    [:assistant (g/llm :gpt-4
 78 |                       wkk/var-name :sum-gen
 79 |                       wkk/output-format :json)]])
 80 | 
 81 | ;;
 82 | ;; With that set a call to generation (see *Getting Started* and *Configuration* notebooks for more details on how generation works) can be made.
 83 | ;; Note the `output-format` and `FORMAT` parameters:
 84 | ;; - the `FORMAT` will be used to fill in the string value in the template;
 85 | ;; - the `output-format` is a *Bosquet* parameter that will initiate result postprocessing and coerce the result into the specified format. Currently supported formats: EDN, JSON, and plain text.
 86 | ;;
 87 | 
 88 | ^{:nextjournal.clerk/visibility {:result :hide}}
 89 | (def result (g/generate cod-prompt {:ARTICLE article :FORMAT  "JSON"}))
 90 | 
 91 | ;; CoT - as instructed - produces a list of 5 summaries, each summary is a map with `Missing-Entities` and `Denser-Summary` keys. Authors of the paper did human evaluation
 92 | ;; of the produced summaries and found that humans usualy prefer 2-3rd summaries.
 93 | 
 94 | ^{:nextjournal.clerk/visibility {:code :hide}}
 95 | (clerk/html
 96 |   (vec
 97 |     (cons
 98 |       :div.font-mono
 99 |       (map-indexed
100 |         (fn [idx {:strs [Missing-Entities Denser-Summary]}]
101 |           [:div.block.p-6.bg-white.border.border-gray-200.rounded-lg.shadow.hover:bg-gray-100.dark:bg-gray-800.dark:border-gray-700.dark:hover:bg-gray-700.grid.grid-cols-1.gap-3
102 |            [:div.flex
103 |             [:div.flex-none.w-32.mr-4 [:em "Step:"]]
104 |             [:div (inc idx)]]
105 |            [:div.flex
106 |             [:div.flex-none.w-32.mr-4 [:em "Missing Entities:"]]
107 |             [:div Missing-Entities]]
108 |            [:div.flex
109 |             [:div.flex-none.w-32.mr-4 [:em "Denser Summary:"]]
110 |             [:div Denser-Summary]]])
111 |         (get-in result [g/completions :sum-gen])))))
112 | 


--------------------------------------------------------------------------------
/src/bosquet/eval/qna_generator.clj:
--------------------------------------------------------------------------------
  1 | (ns bosquet.eval.qna-generator
  2 |   (:require
  3 |    [bosquet.llm.generator :as gen]
  4 |    [bosquet.llm.openai-tokens :as otok]
  5 |    [bosquet.nlp.splitter :as splitter]
  6 |    [bosquet.read.document :as document]
  7 |    [bosquet.utils :as u]
  8 |    [bosquet.wkk :as wkk]
  9 |    [clojure.core :as c]
 10 |    [clojure.edn :as edn]
 11 |    [clojure.java.io :as io]
 12 |    [taoensso.timbre :as timbre]))
 13 | 
 14 | ;; Some of the details are borrowed from
 15 | ;; https://github.com/run-llama/llama_index/blob/29ef306ae0536de44840ca5acfdf93d84b9a560c/llama_index/evaluation/dataset_generation.py
 16 | 
 17 | (def query-count :eval/query-count)
 18 | (def max-chunks :eval/max-chunks)
 19 | 
 20 | (def context-prompt-block
 21 |   (u/join-lines
 22 |    "CONTEXT INFORMATION is below:"
 23 |    "~~~~~~"
 24 |    "{{context}}"
 25 |    "~~~~~~"))
 26 | 
 27 | (def format-constraints
 28 |   #_"Write your response as numbered list, one item per line."
 29 |   (u/join-lines
 30 |    "Write your response in JSON. Resulting JSON is a vector containing generated items one vector element per generated item."
 31 |    "Example JSON output: [\"Item 1\", \"Item 2\", \"Item 3\", \"Item 4\"]"))
 32 | 
 33 | (defn question-building-prompts
 34 |   [question-count]
 35 |   {:role                ["You are an excelent Teacher who understands subject material perfectly."
 36 |                          "Your ability to analyze text is astonishing. Based on that your task is to setup"
 37 |                          "{{question-count}} questions for the upcoming student examination."
 38 |                          "The questions should be diverse and cover interesting and important topics and facts across the document."
 39 |                          "Restrict the {{question-count}} questions you are writing to the CONTEXT INFORMATION provided."]
 40 |    :format              "Write your response as numbered list, one question per line."
 41 |    :question-generation context-prompt-block
 42 |    :qna                 ["{{role}}"
 43 |                          "{{question-generation}}"
 44 |                          "{{format}}"
 45 |                          ""
 46 |                          "QUESTIONS:"
 47 |                          "{{questions}}"]
 48 |    :questions (gen/llm :gpt-4
 49 |                        wkk/output-format    :list
 50 |                        wkk/model-parameters {:max-tokens (* question-count 100)})})
 51 | 
 52 | (defn answering-prompt
 53 |   [question-count]
 54 |   {:questions [context-prompt-block
 55 |                "Given the CONTEXT INFORMATION and using zero prior knowledge, answer the following QUESTIONS."
 56 |                "QUESTIONS"
 57 |                "{% for q in queries %}"
 58 |                "* {{q}}{% endfor %}"
 59 |                ""
 60 |                "Answer questions in exact same order as they are listed in QUESTIONS. Answer exactly the same list of questions as provided."
 61 |                format-constraints
 62 |                "ANSWERS:"]
 63 |    :answers   (gen/llm :gpt-4
 64 |                        wkk/output-format    :json
 65 |                        wkk/model-parameters {:max-tokens (* question-count 400)})})
 66 | 
 67 | (defn- query-response-valid?
 68 |   "Check the validity of questions and responses.
 69 | 
 70 |   A naive check see if:
 71 |   * the lenght of question collection is the same as answers
 72 |   * collections are min lenght strings as per schema"
 73 |   [{:keys [queries responses context]}]
 74 |   (if (= (count responses) (count queries))
 75 |     true
 76 |     (do
 77 |       (timbre/warnf
 78 |        "Query (count:%s) / Response (count: %s) is invalid. Context: '%s'"
 79 |        (count queries) (count responses) (u/safe-subs context 0 200))
 80 |       false)))
 81 | 
 82 | (defn generate-qna-dataset
 83 |   "Generate a QnA dataset. First a `document` will be loaded and split into sentence chunks.
 84 |   The size of the chunk is a function of how many questions we are generating per chunk.
 85 | 
 86 |   Passed in options control the scope of the QnA generation process:
 87 |   - `query-count`: how many queries per chunk to generate
 88 |   - `max-chunks`: how many chunks to take, nil will take them all"
 89 |   [{q-count query-count chunk-count max-chunks} document]
 90 |   (let [n-sentence   35
 91 |         chunks       (splitter/chunk-text
 92 |                       {splitter/chunk-size (* q-count n-sentence)
 93 |                        splitter/split-unit splitter/sentence}
 94 |                       document)
 95 |         model        :gpt-4
 96 |         xf (comp
 97 |             (map
 98 |              (fn [chunk]
 99 |                (timbre/debugf "QnA for chunk with token count -  %s" (otok/token-count chunk model))
100 |                (let [{:keys [questions]} (gen/generate
101 |                                           (question-building-prompts q-count)
102 |                                           {:question-count q-count :context chunk})
103 |                      resp                (gen/generate
104 |                                           (answering-prompt q-count)
105 |                                           {:queries questions :context chunk})]
106 |                  {:queries   questions
107 |                   :responses resp
108 |                   :context   chunk})))
109 |              ;; TODO instead of filtering out - retry
110 |             (filter query-response-valid?))]
111 |     (into [] xf
112 |           (if chunk-count
113 |             (take chunk-count chunks)
114 |             chunks))))
115 | 
116 | (defn qna->eval-dataset
117 |   "Convert QnA data to a dataset format -  a list of question to answer tuples"
118 |   [ds-id ds-data]
119 |   {:dataset ds-id
120 |    :eval    (vec (mapcat
121 |                   (fn [{:keys [queries responses]}] (map vector queries responses))
122 |                   ds-data))})
123 | 
124 | (defn save-dataset
125 |   [ds-file ds]
126 |   (io/make-parents ds-file)
127 |   (spit ds-file (u/pp-str ds)))
128 | 
129 | (defn document->dataset
130 |   "Given a `document-file` create a QnA dataset. Save it to
131 |   `dataset-file`"
132 |   [opts document-file dataset-file]
133 |   (->> document-file
134 |        document/parse
135 |        :text
136 |        (generate-qna-dataset opts)
137 |        (qna->eval-dataset document-file)
138 |        (save-dataset dataset-file)))
139 | 
140 | (defn load-qna-dataset
141 |   "Load eval tuples from file (created via `save-dataset`)"
142 |   [file-name]
143 |   (-> file-name slurp edn/read-string :eval))
144 | 
145 | (comment
146 |   (document->dataset {query-count 4} "data/llama2.pdf" "data/llama2-eval.edn")
147 | 
148 |   (load-qna-dataset "data/llama2-eval.edn")
149 | 
150 |   (def text (:text (document/parse "data/llama2.pdf")))
151 |   (def qna (generate-qna-dataset {query-count 2 max-chunks 2} text))
152 |   (tap> qna)
153 | 
154 |   (gen/generate
155 |    (question-building-prompts 3)
156 |    {:question-count 3
157 |     :context (second (splitter/text-splitter
158 |                       {:chunk-size 25 :splitter splitter/sentence-splitter}
159 |                       text))})
160 |   #__)
161 | 


--------------------------------------------------------------------------------
/test/bosquet/llm/generator_test.clj:
--------------------------------------------------------------------------------
  1 | (ns bosquet.llm.generator-test
  2 |   (:require
  3 |    [matcher-combinators.test]
  4 |    [bosquet.db.cache :as cache]
  5 |    [bosquet.env :as env]
  6 |    [bosquet.llm.generator :as gen]
  7 |    [bosquet.llm.wkk :as wkk]
  8 |    [bosquet.utils :as u]
  9 |    [clojure.test :refer [deftest is]]))
 10 | 
 11 | (def echo-service-chat-last
 12 |   "Fake generation. Take last message and repeat it as generation output"
 13 |   (fn [_system {msg :messages}]
 14 |     {wkk/content
 15 |      {:role :assistant :content (-> msg last :content)}}))
 16 | 
 17 | (def echo-service-chat-first
 18 |   "Fake generation. Take first message and repeat it as generation output"
 19 |   (fn [_system {msg :messages}]
 20 |     {wkk/content
 21 |      {:role :assistant :content (-> msg first :content)}}))
 22 | 
 23 | (deftest chat-generation
 24 |   (with-redefs [env/config {:service-last  {:chat-fn echo-service-chat-last}
 25 |                             :service-first {:chat-fn echo-service-chat-first}}]
 26 |     (let [{:bosquet/keys [conversation completions usage time]}
 27 |           (gen/generate
 28 |            [[:system "You are a brilliant writer."]
 29 |             [:user ["Write a synopsis for the play:"
 30 |                     "Title: {{title}}"]]
 31 |             [:assistant (gen/llm :service-first wkk/var-name :synopsis)]
 32 |             [:user "Now write a critique of the above synopsis:"]
 33 |             [:assistant (gen/llm :service-last wkk/var-name :critique)]]
 34 |            {:title "Mr. O"})]
 35 |       (is (number? time))
 36 |       (is (= [[:system "You are a brilliant writer."]
 37 |               [:user (u/join-lines
 38 |                       "Write a synopsis for the play:"
 39 |                       "Title: Mr. O")]
 40 |               [:assistant "You are a brilliant writer."]
 41 |               [:user "Now write a critique of the above synopsis:"]
 42 |               [:assistant "Now write a critique of the above synopsis:"]]
 43 |              conversation))
 44 |       (is (= {:synopsis "You are a brilliant writer."
 45 |               :critique "Now write a critique of the above synopsis:"}
 46 |              completions))
 47 |       (is (= {:synopsis      nil
 48 |               :critique      nil
 49 |               :bosquet/total {:prompt 0 :completion 0 :total 0}}
 50 |              usage)))))
 51 | 
 52 | (deftest map-generation
 53 |   (with-redefs [env/config {:service-const
 54 |                             {:chat-fn (fn [_ _]
 55 |                                         {wkk/content {:content "!!!" :role :assistant}
 56 |                                          wkk/usage   {:prompt 1 :completion 3 :total 4}})}}]
 57 |     (let [{:bosquet/keys [completions usage]}
 58 |           (gen/generate
 59 |            {:question-answer "Question: {{question}} Answer: {{answer}}"
 60 |             :answer          (gen/llm :service-const)
 61 |             :self-eval       ["{{question-answer}}"
 62 |                               "Is this a correct answer?"
 63 |                               "{{test}}"]
 64 |             :test            (gen/llm :service-const)}
 65 |            {:question "What is the distance from Moon to Io?"})]
 66 | 
 67 |       (is (= {:question-answer "Question: What is the distance from Moon to Io? Answer: !!!"
 68 |               :question        "What is the distance from Moon to Io?"
 69 |               :answer          "!!!"
 70 |               :self-eval       (u/join-lines
 71 |                                 "Question: What is the distance from Moon to Io? Answer: !!!"
 72 |                                 "Is this a correct answer?"
 73 |                                 "!!!")
 74 |               :test            "!!!"}
 75 |              completions))
 76 |       (is (= {:answer        {:prompt 1 :completion 3 :total 4}
 77 |               :test          {:prompt 1 :completion 3 :total 4}
 78 |               :bosquet/total {:prompt 2 :completion 6 :total 8}}
 79 |              usage)))))
 80 | 
 81 | (deftest fail-generation
 82 |   (is (match?
 83 |        {gen/completions {:in "How are you? {{out}}" :out nil}
 84 |         gen/usage       {:out           nil
 85 |                          :bosquet/total {:prompt 0 :completion 0 :total 0}}}
 86 |        (gen/generate
 87 |         {:in  "How are you? {{out}}"
 88 |          :out (gen/llm :non-existing-service)}
 89 |         {}))))
 90 | 
 91 | (deftest appending-gen-instruction
 92 |   (is (= {gen/default-template-prompt     "What is the distance from Moon to Io? {{bosquet..template/completion}}"
 93 |           gen/default-template-completion (env/default-service)}
 94 |          (gen/append-generation-instruction
 95 |           "What is the distance from Moon to Io?"))))
 96 | 
 97 | (deftest chache-usage
 98 |   (let [call-counter (atom 0)
 99 |         cached-props (atom [])
100 |         question     "What is the distance from Moon to Io?"
101 |         env-config {:service-const
102 |                     {:chat-fn (fn [_ props]
103 |                                 (swap! cached-props conj props)
104 |                                 (swap! call-counter inc) {})}}
105 |         generate     (fn [cache q]
106 |                        (gen/generate
107 |                         {:qna "Question: {{q}}  Answer: {{a}}"
108 |                          :a   (gen/llm :service-const wkk/cache cache)}
109 |                         {:q q}))]
110 |     (with-redefs [env/config env-config]
111 |       ;; cache is off
112 |       (generate false question)
113 |       (is (= 1 @call-counter))
114 |       (generate false question)
115 |       (is (= 2 @call-counter))
116 |       ;; cache is on
117 |       (generate true question)
118 |       (is (= 3 @call-counter))
119 |       (generate true question)
120 |       (is (= 3 @call-counter))
121 |       (generate true "What is the distance between X and Y?")
122 |       (is (= 4 @call-counter))
123 |       ;; clear cache
124 |       (doseq [p @cached-props]
125 |         (cache/evict p)))))
126 | 
127 | (deftest find-var-references
128 |   (is (= [:y :z] (gen/find-refering-templates :x {:x "aaa" :y "{{x}}" :z "{{x}} {{y}}"})))
129 |   (is (= [:y :z] (gen/find-refering-templates :n/x {:x "aaa" :y "{{n/x}}" :z "{{n/x}} {{y}}"})))
130 |   (is (= [:n/y :n/z] (gen/find-refering-templates :x {:x "aaa" :n/y "{{x}}" :n/z "{{x}} {{y}}"})))
131 |   (is (= [] (gen/find-refering-templates :x {:x "aaa"}))))
132 | 
133 | (deftest ->chatml-conversion
134 |   (is (= [{:role :user :content "Hi!"}] (gen/->chatml [[:user "Hi!"]])))
135 |   (is (= [{:role :user :content "{\"lon\":54.1,\"lat\":50.3}"}]
136 |          (gen/->chatml [[:user {:lon 54.1 :lat 50.3}]]))))
137 | 
138 | (deftest llm-spec-construction
139 |   (is (= {wkk/service :openai} (gen/llm :openai)))
140 |   (is (= {wkk/model-params {:model :command} wkk/service :cohere}
141 |          (gen/llm :command))))
142 | 
143 | (deftest slot-filling
144 |   (is (= "3 + 1 = 4"
145 |          (get-in
146 |           (gen/generate
147 |            {:z "{{y}} + {{x}} = {{a}}" :a 4} {:x 1 :y 3})
148 |           [gen/completions :z]))))
149 | 
150 | (deftest run-node-function-test
151 |   (is (= 3 (gen/run-node-function
152 |             {wkk/fun-impl (fn [x y] (+ x y))
153 |              wkk/fun-args '[x y]}
154 |             {:x 1 :y 2}))))
155 | 


--------------------------------------------------------------------------------
/resources/env.edn:
--------------------------------------------------------------------------------
  1 | #mmerge
  2 |  [{;; Configuration for the LLM services. See '#include "config.edn"' at the
  3 |    ;; bottom for secrets and other local props. Whatever is declared in `config.edn`
  4 |    ;; will override the values declared here.
  5 | 
  6 |    ;; ######################################################################
  7 |    ;; LLM services
  8 |    ;; ######################################################################
  9 | 
 10 |    :openai {:api-endpoint #or [#env "OPENAI_API_ENDPOINT" "https://api.openai.com/v1"]
 11 |             :model-params {:model :gpt-3.5-turbo}
 12 |             :complete-fn  bosquet.llm.openai/complete
 13 |             :chat-fn      bosquet.llm.openai/chat
 14 |             :impl         :openai
 15 |                   ;; A list of model names supported by this service. It is an
 16 |                   ;; optional data point that allows a shortcut when defining LLM
 17 |                   ;; calls with (generator/llm) function. Instead of
 18 |                   ;; `(llm :openai :model-params {:model :gpt-3.5})`
 19 |                   ;; a shorthand of `(llm :gpt-3.5)` will work
 20 |             :model-names
 21 |             #{:babbage-002 :davinci-002
 22 |               :gpt-3.5 :gpt-3.5-turbo
 23 |               :gpt-3.5-turbo-0125 :gpt-3.5-turbo-0301
 24 |               :gpt-3.5-turbo-0613 :gpt-3.5-turbo-1106
 25 |               :gpt-3.5-turbo-16k-0613 :gpt-3.5-turbo-instruct
 26 |               :gpt-4 :gpt-4-0125-preview :gpt-4-1106-preview
 27 |               :gpt-4-1106-vision-preview :gpt-4-32k
 28 |               :gpt-4o :gpt-4o-2024-05-13
 29 |               :gpt-5-nano :gpt-5 :gpt-5-mini
 30 |               ;; embeddings
 31 |               :text-embedding-3-large
 32 |               :text-embedding-3-small
 33 |               :text-embedding-ada-002}}
 34 | 
 35 |    :localai {:api-endpoint #or [#env "LOCALAI_API_ENDPOINT" "http://0.0.0.0:8080/v1"]
 36 |              :model-params {:model :phi-4}
 37 |              :complete-fn  bosquet.llm.localai/complete
 38 |              :chat-fn      bosquet.llm.localai/chat
 39 |              :impl         :openai
 40 |              :model-names  #{:phi-4}}
 41 | 
 42 |    :openai-azure {:api-endpoint #env "AZURE_OPENAI_API_ENDPOINT"
 43 |                   :model-params {:model :gpt-3.5-turbo}
 44 |                   :impl         :azure}
 45 |    :ollama       {:api-endpoint #or [#env "OLLAMA_API_ENDPOINT" "http://localhost:11434/api"]
 46 |                   :complete-fn  bosquet.llm.ollama/complete
 47 |                   :chat-fn      bosquet.llm.ollama/chat
 48 |                   :embed-fn     bosquet.llm.ollama/create-embedding}
 49 |    :lmstudio     {:api-endpoint #or [#env "LMSTUDIO_API_ENDPOINT" "http://localhost:1234/v1"]
 50 |                   :complete-fn  bosquet.llm.oai-shaped-llm/complete
 51 |                   :chat-fn      bosquet.llm.oai-shaped-llm/chat
 52 |                   :model-params {:max_tokens  -1
 53 |                                  :temperature 0
 54 |                                  :stream      false}}
 55 |    :mistral      {:api-endpoint #or [#env "MISTRAL_API_ENDPOINT" "https://api.mistral.ai/v1"]
 56 |                   :model-params {:model       :mistral-small-latest
 57 |                                  :temperature 0}
 58 |                   :complete-fn  bosquet.llm.oai-shaped-llm/complete
 59 |                   :chat-fn      bosquet.llm.oai-shaped-llm/chat
 60 |                   :model-names
 61 |                   #{:open-mistral-7b :open-mixtral-8x7b
 62 |                     :mistral-small-latest :mistral-medium-latest :mistral-large-latest
 63 |                     :mistral-small :mistral-medium :mistral-large
 64 |                     :open-mistral-nemo
 65 |                     :mistral-embed
 66 |                     :open-codestral-mamba :codestral-latest}}
 67 |    :claude       {:api-endpoint #or [#env "CLAUDE_API_ENDPOINT" "https://api.anthropic.com/v1"]
 68 |                   :api-key      [#env "ANTHROPIC_API_KEY"]
 69 |                   :model-params {:model       :claude-3-5-haiku-latest
 70 |                                           ;; max tokens is required parameter when
 71 |                                           ;; making a call to Claude, use this as default
 72 |                                  :max_tokens  500
 73 |                                  :temperature 0.6}
 74 |                   :chat-fn      bosquet.llm.claude/messages
 75 |                   :model-names
 76 |                   #{:claude-3-opus-latest :claude-3-5-haiku-latest :claude-3-7-sonnet-latest
 77 |                     :claude-3-5-sonnet-latest
 78 |                     :claude-3-7-sonnet-20250219
 79 |                     :claude-3-5-haiku-20241022 :claude-3-5-sonnet-20241022
 80 |                     :claude-3-5-sonnet-20240620 :claude-3-opus-20240229 :claude-3-sonnet-20240229
 81 |                     :claude-3-haiku-20240307}}
 82 |    :cohere       {:model-params {:model       :command
 83 |                                  :temperature 0}
 84 |                   :complete-fn  bosquet.llm.cohere/complete
 85 |                   :chat-fn      bosquet.llm.cohere/chat
 86 |                   :model-names
 87 |                   #{:command :command-light :command-light-nightly
 88 |                     :command-r-plus :command-r}}
 89 |    :pplx         {:api-endpoint #or [#env "PPLX_API_ENDPOINT" "https://api.perplexity.ai"]
 90 |                   :api-key      #env "PPLX_API_KEY"
 91 |                   :model-params {:model :sonar}
 92 |                   :complete-fn  bosquet.llm.oai-shaped-llm/complete
 93 |                   :chat-fn      bosquet.llm.oai-shaped-llm/chat
 94 |                   :model-names
 95 |                   #{:llama-3.1-sonar-small-128k-online ;; will probably deprecate soon
 96 |                     :llama-3.1-sonar-large-128k-online ;; will probably deprecate soon  
 97 |                     :llama-3.1-sonar-huge-128k-online  ;; will probably deprecate soon
 98 |                     :sonar-deep-research
 99 |                     :sonar-reasoning-pro
100 |                     :sonar-reasoning
101 |                     :sonar
102 |                     :sonar-pro
103 |                     :r1-1776}}
104 |    :groq         {:api-endpoint #or [#env "GROQ_API_ENDPOINT" "https://api.groq.com/openai/v1"]
105 |                   :api-key      #env "GROQ_API_KEY"
106 |                   :model-params {:model :llama-3.3-70b-versatile}
107 |                   :complete-fn  bosquet.llm.oai-shaped-llm/complete
108 |                   :chat-fn      bosquet.llm.oai-shaped-llm/chat
109 |                   :model-names
110 |                   #{:deepseek-r1-distill-llama-70b
111 |                     :deepseek-r1-distill-qwen-32b
112 |                     :gemma2-9b-it
113 |                     :llama-3.1-8b-instant
114 |                     :llama-3.2-1b-preview
115 |                     :llama-3.2-3b-preview
116 |                     :llama-3.3-70b-specdec
117 |                     :llama-3.3-70b-versatile
118 |                     :meta-llama/llama-4-scout-17b-16e-instruct
119 |                     :meta-llama/llama-4-maverick-17b-128e-instruct
120 |                     :llama-guard-3-8b
121 |                     :llama3-8b-8192
122 |                     :llama3-70b-8192
123 |                     :mistral-saba-24b
124 |                     :qwen-2.5-32b
125 |                     :qwen-2.5-coder-32b
126 |                     :qwen-qwq-32b
127 |                     :allam-2-7b}}
128 | 
129 |    ;; ########
130 |    ;; DB
131 |    ;; ########
132 | 
133 |    :qdrant {:api-endpoint #or [#env "QDRANT_API_ENDPOINT" "http://localhost:6333"]
134 |             :on-disk      true
135 |             :size         1536
136 |             :distance     :Dot}}
137 | 
138 |   ;; config.edn contains local settings for the LLM services, tools,
139 |   ;; and other components, values in this file will override the above
140 |   #include-config "config.edn"
141 |   #include-config "secrets.edn"]
142 | 


--------------------------------------------------------------------------------
/notebook/papers/llms_as_optimizers.clj:
--------------------------------------------------------------------------------
  1 | (ns papers.llms-as-optimizers
  2 |   (:require
  3 |    [bosquet.env :as env]
  4 |    [bosquet.eval.evaluator :as eval]
  5 |    [bosquet.eval.qna-generator :as qna]
  6 |    [bosquet.llm.generator :as gen]
  7 |    [bosquet.read.document :as document]
  8 |    [bosquet.utils :as u]
  9 |    [bosquet.wkk :as wkk])
 10 |   (:import [bosquet.db.qdrant Qdrant]
 11 |            [bosquet.memory.long_term_memory LongTermMemory]))
 12 | 
 13 | ;; https://arxiv.org/pdf/2309.03409.pdf
 14 | ;;
 15 | ;; Optimization is ubiquitous. While derivative-based algorithms have been powerful tools for
 16 | ;; various problems, the absence of gradient imposes challenges on many real-world applications.
 17 | ;; In this work, we propose Optimization by PROmpting (OPRO), a simple and effective approach
 18 | ;; to leverage large language models (LLMs) as optimizers, where the optimization task is
 19 | ;; described in natural language. In each optimization step, the LLM generates new solutions from
 20 | ;; the prompt that contains previously generated solutions with their values, then the new
 21 | ;; solutions are evaluated and added to the prompt for the next optimization step.
 22 | 
 23 | 
 24 | ;;
 25 | ;; ## Setup
 26 | ;;
 27 | 
 28 | (def memory (LongTermMemory.
 29 |              (Qdrant. {:collection-name "llama2-qna-eval"})
 30 |              (env/val :ollama)))
 31 | 
 32 | 
 33 | ;; Create evaluation QnA dataset
 34 | (comment
 35 |   (qna/document->dataset {qna/query-count 4}
 36 |                          "data/llama2.pdf" "data/llama2-eval.edn")
 37 |   #__)
 38 | 
 39 | 
 40 | ;; Commit document contents to memory, it will be chunked, chunks turned into embeddings,
 41 | ;; and saved to Qdrant
 42 | ;;
 43 | ;; Results should be available in
 44 | ;; http://localhost:6333/dashboard#/collections/llama2-qna-eval
 45 | (comment
 46 |   (def text (:text (document/parse "data/llama2.pdf")))
 47 |   (eval/store-knowledge mem-opts text))
 48 | 
 49 | 
 50 | ;; Questions and answers golden dataset. Will be used to eval against and optimize the prompts
 51 | (def qna-goldenset
 52 |   (qna/load-qna-dataset "data/llama2-eval.edn"))
 53 | 
 54 | ;;
 55 | ;; ## Prompts
 56 | ;;
 57 | ;;
 58 | ;; OPRO Meta-prompt
 59 | ;;
 60 | (def opro-prompt
 61 |   (u/join-lines
 62 |     "Your task is to generate the instruction <INS>. Below are some previous instructions with their scores."
 63 |     "The score ranges from 1 to 5."
 64 |     ""
 65 |     "{% for i in instruction-score-pairs %}"
 66 |     "Instruction (<INS>): {{i.instruction}}"
 67 |     "Score: {{i.avg-score}}"
 68 |     "{% endfor %}"
 69 |     ""
 70 |     "Below we show the task. The <INS> tag is prepended to the below prompt template, e.g. as follows:"
 71 |     ""
 72 |     "```"
 73 |     "<INS>"
 74 |     "{{prompt-template}}"
 75 |     "```"
 76 |     ""
 77 |     "The prompt template contains template variables. Given an input set of template variables, the formatted prompt is then given to an LLM to get an output."
 78 |     ""
 79 |     "Some examples of template variable inputs and expected outputs are given below to illustrate the task. **NOTE**: These do NOT represent the"
 80 |     "entire evaluation dataset."
 81 |     ""
 82 |     "{% for q,a in qna-pairs %}"
 83 |     "Question: {{q}}"
 84 |     "Answer: {{a}}"
 85 |     "{% endfor %}"
 86 |     ""
 87 |     #_"We run every input in an evaluation dataset through an LLM. If the LLM-generated output doesn't match the expected output, we mark it as wrong (score 0)."
 88 |     "We run every input in an evaluation dataset through an LLM. If the LLM-generated output doesn't match the expected output, we mark it as wrong (score 1)."
 89 |     "Ideal answer has a score of 5. With range in between indicating various matching levels."
 90 |     #_"The final 'score' for an instruction is the average of scores across an evaluation dataset."
 91 |     "Write your new instruction (<INS>) that is different from the old ones and has a score as high as possible."
 92 |     "Be very concise in your instruction. As the same time try to write more genericaly applicable instruction."
 93 |     ""
 94 |     "<INS>"
 95 |     "{% gen var-name=instruction %}"))
 96 | 
 97 | (defn optimization-step-prompt
 98 |   [instruction]
 99 |   {:instruction     instruction
100 |    :prompt-template (u/join-lines
101 |                      "{{instruction}}"
102 |                      "Context information is below."
103 |                      "~~~~~"
104 |                      "{{context}}"
105 |                       "~~~~~"
106 |                      "Query: {{query}}"
107 |                      "Answer:")
108 |    :generation      (u/join-lines
109 |                       "{{prompt-template}}"
110 |                       "{% gen var-name=answer %}")})
111 | 
112 | 
113 | (defn evaluate-instruction
114 |   [step-prompt eval-qna]
115 |   (map (fn [[eval-question eval-answer]]
116 |          (let [relevant-memories (eval/query mem-opts eval-question)
117 |                {:keys [answer]}  (gen/generate
118 |                                    step-prompt
119 |                                    {:query   eval-question
120 |                                     :context relevant-memories}
121 |                                    {:score wkk/gpt3.5-turbo-with-cache})]
122 |            {:score         (eval/evaluate-answer eval-question eval-answer answer)
123 |             :step-prompt   step-prompt
124 |             :eval-question eval-question
125 |             :eval-answer   eval-answer
126 |             :test-answer   answer}))
127 |     eval-qna))
128 | 
129 | (defn average-instruction-score
130 |   [evaluations]
131 |   (/ (reduce + (map :score evaluations))
132 |     (count evaluations)))
133 | 
134 | (defn prompt-optimizer
135 |   [start-prompt eval-set steps]
136 |   (loop [n            steps
137 |          step-prompt  start-prompt
138 |          instructions []]
139 |     (if (zero? n)
140 |       instructions
141 |       (let [step-prompt-template (optimization-step-prompt step-prompt)
142 |             optimization (gen/generate
143 |                            opro-prompt
144 |                            {:instruction-score-pairs instructions
145 |                             :prompt-template         (:prompt-template step-prompt-template)
146 |                             :qna-pairs               (take 4 (shuffle qna-goldenset))}
147 |                            {:score wkk/gpt3.5-turbo-with-cache})
148 |             eval         (evaluate-instruction step-prompt-template eval-set)]
149 |         (tap> {'optimization optimization
150 |                'eval eval})
151 |         (recur
152 |           (dec n)
153 |           (:instruction optimization)
154 |           (conj instructions {:instruction (:instruction optimization)
155 |                               :avg-score   (average-instruction-score eval)
156 |                               :eval        eval}))))))
157 | 
158 | (tap>
159 |   (prompt-optimizer
160 |     "Given the context information and not prior knowledge, answer the query."
161 |     (take 6 qna-goldenset) 4))
162 | 
163 | 
164 | #_(def relevant-memories
165 |     (eval/query mem-opts question))
166 | 
167 | #_(def answer-from-context
168 |   (:answer
169 |    (gen/generate
170 |      step-0-prompt
171 |      {:query question
172 |       :context relevant-memories}
173 |      {:score wkk/gpt3.5-turbo-with-cache})))
174 | 
175 | #_(def score
176 |   (eval/evaluate-answer
177 |     question
178 |     "The key contributions of the Llama 2 project include the development and release of pretrained and fine-tuned large language models (LLMs) optimized for dialogue use cases. The Llama 2-Chat models outperform existing open-source chat models on most benchmarks, and based on human evaluations for helpfulness and safety, they may be a suitable substitute for closed-source models."
179 |     answer-from-context))
180 | 
181 | 
182 | #_(read/fill-slots
183 |   opro-prompt
184 |   {:instruction-score-pairs [{:instruction (:instruction step-0-prompt) :score score}]
185 |    :prompt-template (:prompt-template step-0-prompt)
186 |    :qna-pairs (take 4 qna-goldenset)})
187 | 
188 | 
189 | #_(gen/generate
190 |   opro-prompt
191 |   {:instruction-score-pairs [{:instruction (:instruction step-0-prompt) :score score}]
192 |    :prompt-template (:prompt-template step-0-prompt)
193 |    :qna-pairs (take 4 (shuffle qna-goldenset))}
194 |   {:score wkk/gpt3.5-turbo-with-cache})
195 | 


--------------------------------------------------------------------------------
/notebook/text_splitting.clj:
--------------------------------------------------------------------------------
  1 | ^{:nextjournal.clerk/visibility {:code :fold}}
  2 | (ns text-splitting
  3 |   {:nextjournal.clerk/toc true}
  4 |   (:require
  5 |    [bosquet.llm.generator :as g]
  6 |    [bosquet.nlp.splitter :as split]
  7 |    [clojure.string :as string]
  8 |    [helpers :as h]
  9 |    [nextjournal.clerk :as clerk]))
 10 | 
 11 | ;; # Text chunking
 12 | ;;
 13 | ;; Text chunking is the process of breaking a text into parts. It is an essential part of
 14 | ;; working with LLMs since they can only process a limited amount of text. Even as LLM context
 15 | ;; windows grow, text chunking remains important. LLMs are U-shaped reasoners. They are good
 16 | ;; at remembering the beginning and the end of a text but are not great at dealing with content
 17 | ;; in the middle.
 18 | 
 19 | ;; Thus text chunking can help to increase the relevancy of LLM-based extraction and text
 20 | ;; generation.
 21 | 
 22 | ;; ### Chunking strategies
 23 | 
 24 | ;; Text splitting can be done by using different splitting units. *Bosquet* supports splitting by:
 25 | ;;
 26 | ;; - characters
 27 | ;; - tokens
 28 | ;; - sentences
 29 | ;;
 30 | ;; An additional important splitting feature is the overlap between chunks. This helps to
 31 | ;; prevent losing context information at the chunk boundaries. Example text (first paragraph
 32 | ;; from *Moby Dick*) to experiment with different chunking approaches
 33 | 
 34 | ^{:nextjournal.clerk/visibility {:result :hide}}
 35 | (def text
 36 |   "Call me Ishmael. Some years ago—never mind how long precisely—having
 37 | little or no money in my purse, and nothing particular to interest me
 38 | on shore, I thought I would sail about a little and see the watery part
 39 | of the world. It is a way I have of driving off the spleen and
 40 | regulating the circulation. Whenever I find myself growing grim about
 41 | the mouth; whenever it is a damp, drizzly November in my soul; whenever
 42 | I find myself involuntarily pausing before coffin warehouses, and
 43 | bringing up the rear of every funeral I meet; and especially whenever
 44 | my hypos get such an upper hand of me, that it requires a strong moral
 45 | principle to prevent me from deliberately stepping into the street, and
 46 | methodically knocking people’s hats off—then, I account it high time to
 47 | get to sea as soon as I can.")
 48 | 
 49 | ;;
 50 | ;; #### Splitting by characters
 51 | ;;
 52 | ;; Splitting by characters will take the text and chop it at every N-th character. This
 53 | ;; strategy is probably best used for text that has a known structure or a regular data form:
 54 | ;; tables, CSV content, etc.
 55 | ;;
 56 | ;; Here, the text will be split at every 140 characters with the 10 characters overlap.
 57 | ;; *Note*, how 10 characters from the N-1 chunk are included at the beginning of the N chunk.
 58 | 
 59 | (def char-chunks (split/chunk-text
 60 |                   {split/chunk-size 200
 61 |                    split/overlap    10
 62 |                    split/split-unit split/character}
 63 |                   text))
 64 | 
 65 | 
 66 | ^{:nextjournal.clerk/visibility {:code :hide}}
 67 | (clerk/html
 68 |   (vec
 69 |     (cons
 70 |       :div.font-mono
 71 |       (map
 72 |         (fn [chunk]
 73 |           [:div.block.p-6.bg-white.border.border-gray-200.rounded-lg.shadow.hover:bg-gray-100.dark:bg-gray-800.dark:border-gray-700.dark:hover:bg-gray-700.grid.grid-cols-1.gap-3
 74 |            [:div.flex
 75 |             [:div chunk]]])
 76 |         char-chunks))))
 77 | 
 78 | ;;
 79 | ;; #### Splitting by sentences
 80 | ;;
 81 | ;; Sentence splitting relies on OpenNLP models - https://opennlp.apache.org/models.html
 82 | ;;
 83 | ;; **NOTE:** They need to be downloaded before using this functionality.
 84 | ;;
 85 | ;; Running `bb lang:sent:en` will download the English sentence splitting model and place it in `lang/en` directory.
 86 | ;;
 87 | ;; Splitting by sentences will partition the text into chunks of N sentences. This results in
 88 | ;; chunks that are natural to reader. It will also prevent cutting the meaning of the sentence
 89 | ;; into two chunks. For this reason, the need for overlap parameter is less important when
 90 | ;; using this splitting method. However, long sentences might result in overflows of the context
 91 | ;; window of the LLM.
 92 | ;;
 93 | ;; **Note:** that overlap is not specified in the example below - the default of 0 is used.
 94 | 
 95 | (def sentence-chunks (split/chunk-text
 96 |                       {split/chunk-size 3
 97 |                        split/split-unit split/sentence}
 98 |                       text))
 99 | 
100 | ^{:nextjournal.clerk/visibility {:code :hide}}
101 | (clerk/html
102 |   (vec
103 |     (cons
104 |       :div.font-mono
105 |       (map
106 |         (fn [chunk]
107 |           [:div.block.p-6.bg-white.border.border-gray-200.rounded-lg.shadow.hover:bg-gray-100.dark:bg-gray-800.dark:border-gray-700.dark:hover:bg-gray-700.grid.grid-cols-1.gap-3
108 |            [:div.flex
109 |             [:div chunk]]])
110 |         sentence-chunks))))
111 | 
112 | ;;
113 | ;; #### Splitting by tokens
114 | ;;
115 | ;; Splitting by tokens will take the text and chop it every N tokens. This is the most convenient and
116 | ;; safe way to split text for LLMs. With token splitting there is full control of exactly how many
117 | ;; tokens are used in a given split, thus we can be sure to prevent overflows of the context window.
118 | 
119 | (def token-chunks (split/chunk-text
120 |                    {split/chunk-size 50
121 |                     split/overlap    5
122 |                     split/split-unit split/token
123 |                     split/model      :gpt-4}
124 |                    text))
125 | 
126 | ^{:nextjournal.clerk/visibility {:code :hide}}
127 | (clerk/html
128 |   (vec
129 |     (cons
130 |       :div.font-mono
131 |       (map
132 |         (fn [chunk]
133 |           [:div.block.p-6.bg-white.border.border-gray-200.rounded-lg.shadow.hover:bg-gray-100.dark:bg-gray-800.dark:border-gray-700.dark:hover:bg-gray-700.grid.grid-cols-1.gap-3
134 |            [:div.flex
135 |             [:div chunk]]])
136 |         token-chunks))))
137 | 
138 | ;; ### Generation with chunking
139 | ;;
140 | ;; An example of using text chunking would be to send parts of the longer text to LLM for
141 | ;; processing separately and then aggregating the results. Let's extract the feelings expressed
142 | ;; by the character in Moby Dick's first paragraph (pretending that it is a very long text).
143 | ;;
144 | 
145 | (def extraction-prompt
146 |   "You are a brillian reader of human emotions. Your ability to analyze text is unparalleled.
147 | Please analyze the text bellow, and provide a list emotions expressed by the character in that text.
148 | 
149 | Reply with one or two words name for the empotion. Please refrain from further explanations.
150 | If no emotions are epressed, reply with 'no emotions expressed'. Provide your response as
151 | a bullet list.
152 | 
153 | TEXT: {{chunk}}")
154 | 
155 | (defn analysis
156 |   [chunks]
157 |   (mapv
158 |    #(g/generate extraction-prompt {:chunk %})
159 |    chunks))
160 | 
161 | ;; Bellow per chunker resutls show how chunking methid can influence the output. The three methods
162 | ;; return quite different extracted emotions.
163 | ;;
164 | ;; #### Character splitter
165 | 
166 | (def char-results (analysis char-chunks))
167 | 
168 | ^{:nextjournal.clerk/visibility {:code :hide}}
169 | (h/card-list (mapv clerk/md char-results))
170 | 
171 | ;; #### Sentence splitter
172 | 
173 | (def sentence-results (analysis sentence-chunks))
174 | 
175 | ^{:nextjournal.clerk/visibility {:code :hide}}
176 | (h/card-list (mapv clerk/md sentence-results))
177 | 
178 | ;; #### Token splitter
179 | 
180 | (def token-results (analysis token-chunks))
181 | 
182 | ^{:nextjournal.clerk/visibility {:code :hide}}
183 | (h/card-list (mapv clerk/md token-results))
184 | 
185 | 
186 | ;; #### Summary
187 | ;;
188 | ;; All the chunk results need to be consolidated into a single list of unique emotions. This can
189 | ;; be done with another LLM request that gets all the per chunk detected emotions and aggregates
190 | ;; them into a single list.
191 | 
192 | (def summarization-prompt
193 |   {:prompt
194 |    "You are provided with a list of expressions of emotions. Please aggregate them into
195 | a single list of summarizing emotions. Omit any duplicates and skip 'no empotions expressed' entries.
196 | Respond with unnumbered bullet list and nothing else.
197 | 
198 | EMOTIONS: {{emotions}}
199 | 
200 | {{summary}}"
201 |    :summary (g/llm :mistral-medium)})
202 | 
203 | (defn summarize [analysis]
204 |   (->
205 |    summarization-prompt
206 |    (g/generate {:emotions (string/join ", " analysis)})
207 |    g/completions
208 |    :summary))
209 | 
210 | (clerk/table [["Character split" (clerk/md (summarize char-results))]
211 |               ["Sentence split" (clerk/md (summarize sentence-results))]
212 |               ["Token split" (clerk/md (summarize token-results))]])
213 | 


--------------------------------------------------------------------------------