├── .envrc
├── .gitignore
├── images
    ├── question0.png
    ├── question1.png
    ├── question2.png
    ├── question3.png
    └── question4.png
├── default.nix
├── nix
    └── tiktoken.nix
├── GetDX.hs
├── flake.nix
├── ada.cabal
├── overlay.nix
├── LICENSE
├── flake.lock
├── ada.nix
├── OpenAI.hs
├── README.md
├── Slack.hs
└── Main.hs


/.envrc:
--------------------------------------------------------------------------------
1 | use flake
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | dist-newstyle/
2 | result
3 | result-*
4 | 


--------------------------------------------------------------------------------
/images/question0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MercuryTechnologies/ada/HEAD/images/question0.png


--------------------------------------------------------------------------------
/images/question1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MercuryTechnologies/ada/HEAD/images/question1.png


--------------------------------------------------------------------------------
/images/question2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MercuryTechnologies/ada/HEAD/images/question2.png


--------------------------------------------------------------------------------
/images/question3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MercuryTechnologies/ada/HEAD/images/question3.png


--------------------------------------------------------------------------------
/images/question4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MercuryTechnologies/ada/HEAD/images/question4.png


--------------------------------------------------------------------------------
/default.nix:
--------------------------------------------------------------------------------
 1 | (import
 2 |   (
 3 |     let lock = builtins.fromJSON (builtins.readFile ./flake.lock); in
 4 |     fetchTarball {
 5 |       url = lock.nodes.flake-compat.locked.url or "https://github.com/edolstra/flake-compat/archive/${lock.nodes.flake-compat.locked.rev}.tar.gz";
 6 |       sha256 = lock.nodes.flake-compat.locked.narHash;
 7 |     }
 8 |   )
 9 |   { src = ./.; }
10 | ).defaultNix
11 | 


--------------------------------------------------------------------------------
/nix/tiktoken.nix:
--------------------------------------------------------------------------------
 1 | { mkDerivation, base, base64, bytestring, containers, deepseq
 2 | , filepath, lib, megaparsec, pcre-light, quickcheck-instances
 3 | , raw-strings-qq, tasty, tasty-bench, tasty-quickcheck
 4 | , tasty-silver, text, unordered-containers
 5 | }:
 6 | mkDerivation {
 7 |   pname = "tiktoken";
 8 |   version = "1.0.3";
 9 |   sha256 = "46d619129e267935711131bb1bbd59a2257898c5a89013259a39cad772f2c343";
10 |   enableSeparateDataOutput = true;
11 |   libraryHaskellDepends = [
12 |     base base64 bytestring containers deepseq filepath megaparsec
13 |     pcre-light raw-strings-qq text unordered-containers
14 |   ];
15 |   testHaskellDepends = [
16 |     base bytestring quickcheck-instances tasty tasty-quickcheck
17 |     tasty-silver text
18 |   ];
19 |   benchmarkHaskellDepends = [
20 |     base bytestring deepseq filepath tasty-bench
21 |   ];
22 |   description = "Haskell implementation of tiktoken";
23 |   license = lib.licenses.bsd3;
24 | }
25 | 


--------------------------------------------------------------------------------
/GetDX.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DataKinds          #-}
 2 | {-# LANGUAGE DeriveAnyClass     #-}
 3 | {-# LANGUAGE DeriveGeneric      #-}
 4 | {-# LANGUAGE DerivingStrategies #-}
 5 | {-# LANGUAGE TypeOperators      #-}
 6 | 
 7 | module GetDX where
 8 | 
 9 | import Data.Aeson (FromJSON, ToJSON)
10 | import Data.Text (Text)
11 | import GHC.Generics (Generic)
12 | import Servant.API (Header', JSON, Post, ReqBody, Required, Strict, (:>))
13 | 
14 | data EventsTrackRequest = EventsTrackRequest
15 |     { name :: Text
16 |     , timestamp :: Text
17 |     , email :: Text
18 |     } deriving stock (Generic, Show)
19 |       deriving anyclass (ToJSON)
20 | 
21 | data EventsTrackResponse = EventsTrackResponse{ ok :: Bool, error :: Maybe Text }
22 |     deriving stock (Generic, Show)
23 |     deriving anyclass (FromJSON)
24 | 
25 | type API =
26 |        Header' [Required, Strict] "Authorization" Text
27 |     :> "events.track"
28 |     :> ReqBody '[JSON] EventsTrackRequest
29 |     :> Post '[JSON] EventsTrackResponse
30 | 


--------------------------------------------------------------------------------
/flake.nix:
--------------------------------------------------------------------------------
 1 | { inputs = {
 2 |     flake-compat = {
 3 |       url = "github:edolstra/flake-compat/v1.0.0";
 4 | 
 5 |       flake = false;
 6 |     };
 7 | 
 8 |     flake-utils.url = "github:numtide/flake-utils/v1.0.0";
 9 | 
10 |     nixpkgs.url = "github:NixOS/nixpkgs/25.05";
11 |   };
12 | 
13 |   outputs = { flake-utils, nixpkgs, ... }:
14 |     let
15 |       overlay = import ./overlay.nix;
16 | 
17 |     in
18 |       flake-utils.lib.eachDefaultSystem (system:
19 |         let
20 |           config = { };
21 | 
22 |           pkgs =
23 |             import nixpkgs { inherit config system; overlays = [ overlay ]; };
24 | 
25 |         in
26 |           rec {
27 |             packages.default = pkgs.haskellPackages.ada;
28 | 
29 |             apps.default = {
30 |               type = "app";
31 | 
32 |               program = "${pkgs.ada}/bin/ada";
33 |             };
34 | 
35 |             devShells.default =
36 |               pkgs.mkShell {
37 |                 inputsFrom = [ pkgs.haskellPackages.ada.env ];
38 | 
39 |                 packages = [
40 |                   pkgs.cabal-install
41 |                   pkgs.ghcid
42 |                 ];
43 |               };
44 |           }
45 |       ) // {
46 |         overlays.default = overlay;
47 | 
48 |         nixosModules.default = import ./ada.nix;
49 |       };
50 | }
51 | 


--------------------------------------------------------------------------------
/ada.cabal:
--------------------------------------------------------------------------------
 1 | cabal-version: 2.4
 2 | 
 3 | name:           ada
 4 | version:        1.0.0
 5 | build-type:     Simple
 6 | license:        BSD-3-Clause
 7 | license-file:   LICENSE
 8 | copyright:      2023 Mercury Technologies
 9 | bug-reports:    https://github.com/MercuryTechnologies/ada/issues
10 | synopsis:       A helpful AI assistant for Mercury engineers
11 | 
12 | source-repository head
13 |     type: git
14 |     location: https://github.com/MercuryTechnologies/ada.git
15 | 
16 | executable ada
17 |   main-is: Main.hs
18 |   other-modules: Slack, OpenAI, GetDX
19 |   build-depends:
20 |       base >=4.7 && <5
21 |     , aeson
22 |     , base16
23 |     , bytestring
24 |     , cheapskate
25 |     , containers
26 |     , cryptohash-sha256
27 |     , directory
28 |     , http-api-data
29 |     , http-client
30 |     , http-client-tls
31 |     , http-types
32 |     , kdt
33 |     , logging
34 |     , mtl
35 |     , optparse-applicative
36 |     , pretty-show
37 |     , repline
38 |     , safe-exceptions
39 |     , serialise
40 |     , servant
41 |     , servant-client
42 |     , servant-server
43 |     , string-interpolate
44 |     , text
45 |     , tiktoken
46 |     , time
47 |     , transformers
48 |     , vector
49 |     , vector-split
50 |     , wai
51 |     , wai-extra
52 |     , warp
53 |     , wss-client
54 |   default-language: Haskell2010
55 |   ghc-options: -Wall
56 | 


--------------------------------------------------------------------------------
/overlay.nix:
--------------------------------------------------------------------------------
 1 | final: prev: {
 2 |   ada =
 3 |     final.haskell.lib.justStaticExecutables
 4 |       final.haskellPackages.ada;
 5 | 
 6 |   haskellPackages = prev.haskellPackages.override (old: {
 7 |     overrides =
 8 |       final.lib.fold
 9 |         final.lib.composeExtensions
10 |         (old.overrides or (_: _: { }))
11 |         [ (final.haskell.lib.packageSourceOverrides {
12 |             ada = ./.;
13 | 
14 |             base16 = "1.0";
15 | 
16 |             base64 = "1.0";
17 | 
18 |           })
19 |           (final.haskell.lib.packagesFromDirectory {
20 |             directory = ./nix;
21 |           })
22 |           (hfinal: hprev: {
23 |             cheapskate =
24 |               final.haskell.lib.doJailbreak
25 |                 (final.haskell.lib.unmarkBroken hprev.cheapskate);
26 | 
27 |             skews =
28 |               final.haskell.lib.dontCheck
29 |                 (final.haskell.lib.unmarkBroken hprev.skews);
30 | 
31 |             kdt =
32 |               final.haskell.lib.dontCheck
33 |                 (final.haskell.lib.unmarkBroken hprev.kdt);
34 | 
35 |             tiktoken = final.haskell.lib.doJailbreak hprev.tiktoken;
36 | 
37 |             wss-client =
38 |               prev.lib.pipe hprev.wss-client [
39 |                 final.haskell.lib.doJailbreak
40 |                 final.haskell.lib.dontCheck
41 |                 final.haskell.lib.unmarkBroken
42 |               ];
43 |           })
44 |         ];
45 |   });
46 | }
47 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2023, Mercury Technologies
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its
16 |    contributors may be used to endorse or promote products derived from
17 |    this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/flake.lock:
--------------------------------------------------------------------------------
 1 | {
 2 |   "nodes": {
 3 |     "flake-compat": {
 4 |       "flake": false,
 5 |       "locked": {
 6 |         "lastModified": 1696267196,
 7 |         "narHash": "sha256-AAQ/2sD+0D18bb8hKuEEVpHUYD1GmO2Uh/taFamn6XQ=",
 8 |         "owner": "edolstra",
 9 |         "repo": "flake-compat",
10 |         "rev": "4f910c9827911b1ec2bf26b5a062cd09f8d89f85",
11 |         "type": "github"
12 |       },
13 |       "original": {
14 |         "owner": "edolstra",
15 |         "ref": "v1.0.0",
16 |         "repo": "flake-compat",
17 |         "type": "github"
18 |       }
19 |     },
20 |     "flake-utils": {
21 |       "locked": {
22 |         "lastModified": 1652776076,
23 |         "narHash": "sha256-gzTw/v1vj4dOVbpBSJX4J0DwUR6LIyXo7/SuuTJp1kM=",
24 |         "owner": "numtide",
25 |         "repo": "flake-utils",
26 |         "rev": "04c1b180862888302ddfb2e3ad9eaa63afc60cf8",
27 |         "type": "github"
28 |       },
29 |       "original": {
30 |         "owner": "numtide",
31 |         "ref": "v1.0.0",
32 |         "repo": "flake-utils",
33 |         "type": "github"
34 |       }
35 |     },
36 |     "nixpkgs": {
37 |       "locked": {
38 |         "lastModified": 1748026580,
39 |         "narHash": "sha256-rWtXrcIzU5wm/C8F9LWvUfBGu5U5E7cFzPYT1pHIJaQ=",
40 |         "owner": "NixOS",
41 |         "repo": "nixpkgs",
42 |         "rev": "11cb3517b3af6af300dd6c055aeda73c9bf52c48",
43 |         "type": "github"
44 |       },
45 |       "original": {
46 |         "owner": "NixOS",
47 |         "ref": "25.05",
48 |         "repo": "nixpkgs",
49 |         "type": "github"
50 |       }
51 |     },
52 |     "root": {
53 |       "inputs": {
54 |         "flake-compat": "flake-compat",
55 |         "flake-utils": "flake-utils",
56 |         "nixpkgs": "nixpkgs"
57 |       }
58 |     }
59 |   },
60 |   "root": "root",
61 |   "version": 7
62 | }
63 | 


--------------------------------------------------------------------------------
/ada.nix:
--------------------------------------------------------------------------------
 1 | { config, lib, options, pkgs, ... }:
 2 | 
 3 | { options.services.ada = {
 4 |     enable = lib.mkEnableOption "ada";
 5 | 
 6 |     chat-model = lib.mkOption {
 7 |       type = lib.types.str;
 8 |     };
 9 | 
10 |     embedding-model = lib.mkOption {
11 |       type = lib.types.str;
12 |     };
13 | 
14 |     openAIKeyFile = lib.mkOption {
15 |       type = lib.types.path;
16 |     };
17 | 
18 |     slackKeyFile = lib.mkOption {
19 |       type = lib.types.path;
20 |     };
21 | 
22 |     slackSigningSecretFile = lib.mkOption {
23 |       type = lib.types.path;
24 |     };
25 | 
26 |     getDXKeyFile = lib.mkOption {
27 |       type = lib.types.path;
28 |     };
29 | 
30 |     store = lib.mkOption {
31 |       type = lib.types.path;
32 |     };
33 | 
34 |     port = lib.mkOption {
35 |       type = lib.types.nullOr lib.types.port;
36 | 
37 |       default = null;
38 |     };
39 | 
40 |     debug = lib.mkOption {
41 |       type = lib.types.nullOr lib.types.bool;
42 | 
43 |       default = null;
44 |     };
45 |   };
46 | 
47 |   config = {
48 |     nixpkgs.overlays = [ (import ./overlay.nix) ];
49 | 
50 |     systemd.services.ada = lib.mkIf config.services.ada.enable {
51 |       wantedBy = [ "multi-user.target" ];
52 | 
53 |       path = [ pkgs.ada ];
54 | 
55 |       script =
56 |         let
57 |           adaOptions = {
58 |             inherit (config.services.ada) chat-model embedding-model store;
59 |           };
60 | 
61 |           queryOptions = {
62 |             inherit (config.services.ada) port debug;
63 |           };
64 |         in
65 |           ''
66 |             ada --openai-key "$(< ${lib.escapeShellArg config.services.ada.openAIKeyFile})" ${lib.cli.toGNUCommandLineShell { } adaOptions} query ${lib.cli.toGNUCommandLineShell { } queryOptions} --slack-api-key "$(< ${lib.escapeShellArg config.services.ada.slackKeyFile})" --slack-signing-secret "$(< ${lib.escapeShellArg config.services.ada.slackSigningSecretFile})" --getdx-api-key "$(< ${lib.escapeShellArg config.services.ada.getDXKeyFile})"
67 |           '';
68 |     };
69 |   };
70 | }
71 | 


--------------------------------------------------------------------------------
/OpenAI.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DataKinds             #-}
 2 | {-# LANGUAGE DuplicateRecordFields #-}
 3 | {-# LANGUAGE DeriveAnyClass        #-}
 4 | {-# LANGUAGE DeriveGeneric         #-}
 5 | {-# LANGUAGE DerivingStrategies    #-}
 6 | {-# LANGUAGE TypeOperators         #-}
 7 | 
 8 | module OpenAI where
 9 | 
10 | import Data.Aeson (FromJSON(..), ToJSON, Options(..))
11 | import Data.Text (Text)
12 | import Data.Vector (Vector)
13 | import GHC.Generics (Generic)
14 | import Numeric.Natural (Natural)
15 | 
16 | import Servant.API
17 |     (Header', JSON, Post, ReqBody, Required, Strict, (:>), (:<|>))
18 | 
19 | import qualified Data.Aeson as Aeson
20 | 
21 | dropTrailingUnderscore :: String -> String
22 | dropTrailingUnderscore "_" = ""
23 | dropTrailingUnderscore ""  = ""
24 | dropTrailingUnderscore (c : cs) = c : dropTrailingUnderscore cs
25 | 
26 | aesonOptions :: Options
27 | aesonOptions = Aeson.defaultOptions
28 |     { fieldLabelModifier = dropTrailingUnderscore
29 |     }
30 | 
31 | data EmbeddingRequest = EmbeddingRequest
32 |     { input :: Vector Text
33 |     , model :: Text
34 |     } deriving stock (Generic, Show)
35 |       deriving anyclass (ToJSON)
36 | 
37 | data EmbeddingResponse = EmbeddingResponse
38 |     { data_ :: Vector Embedding
39 |     } deriving stock (Generic, Show)
40 | 
41 | instance FromJSON EmbeddingResponse where
42 |     parseJSON = Aeson.genericParseJSON aesonOptions
43 | 
44 | data Embedding = Embedding
45 |     { index :: Natural
46 |     , embedding :: Vector Double
47 |     } deriving stock (Generic, Show)
48 |       deriving anyclass (FromJSON)
49 | 
50 | type Embeddings =
51 |         "embeddings"
52 |     :>  ReqBody '[JSON] EmbeddingRequest
53 |     :>  Post '[JSON] EmbeddingResponse
54 | 
55 | data CompletionRequest = CompletionRequest
56 |     { messages :: Vector Message
57 |     , model :: Text
58 |     , max_tokens :: Maybe Natural
59 |     } deriving stock (Generic, Show)
60 |       deriving anyclass (ToJSON)
61 | 
62 | data Message = Message
63 |     { content :: Text
64 |     , role :: Text
65 |     } deriving stock (Generic, Show)
66 |       deriving anyclass (FromJSON, ToJSON)
67 | 
68 | data CompletionResponse = CompletionResponse
69 |     { choices :: Vector Choice
70 |     } deriving stock (Generic, Show)
71 |       deriving anyclass (FromJSON)
72 | 
73 | data Choice = Choice
74 |     { message :: Message
75 |     } deriving stock (Generic, Show)
76 |       deriving anyclass (FromJSON)
77 | 
78 | type Completions =
79 |         "chat"
80 |     :>  "completions"
81 |     :>  ReqBody '[JSON] CompletionRequest
82 |     :>  Post '[JSON] CompletionResponse
83 | 
84 | type API =
85 |         Header' [Required, Strict]  "Authorization" Text
86 |     :>  "v1"
87 |     :>  (     Embeddings
88 |         :<|>  Completions
89 |         )
90 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Ask Ada
  2 | 
  3 | Ada is an AI chatbot that we created at
  4 | [Mercury](https://mercury.com/blog/category/engineering) to help more quickly
  5 | onboard new developers and answer questions specific to our company and our
  6 | stack.  Her primary user interface is Slack and she responds to direct messages
  7 | or any message mentioning her.
  8 | 
  9 | Ada actually started out as a Hackweek project to create a cute furry AI
 10 | companion with a personality for Mercury engineers but got popular enough that
 11 | we ended up actually developing her more in earnest.
 12 | 
 13 | This repository is not really intended for use by other companies but you're
 14 | welcome to try to use, improve, or fork this code to your liking.  We've open
 15 | sourced this mostly as a proof-of-concept repository for how to build an AI
 16 | chatbot using
 17 | [retrieval-augmented generation](https://en.wikipedia.org/wiki/Retrieval-augmented_generation).
 18 | 
 19 | ## Usage
 20 | 
 21 | Ada supports three main subcommands:
 22 | 
 23 | ```
 24 | Usage: ada --openai-key KEY --store FILE --chat-model MODEL
 25 |            --embedding-model MODEL COMMAND
 26 | 
 27 |   A helpful AI assistant for Mercury engineers
 28 | 
 29 | Available options:
 30 |   -h,--help                Show this help text
 31 |   --openai-key KEY         OpenAI API key
 32 |   --store FILE             The path to the index
 33 |   --chat-model MODEL       The model to use for answering questions (e.g.
 34 |                            gpt-4o)
 35 |   --embedding-model MODEL  The model to use for creating and querying the index
 36 |                            (e.g. text-embedding-3-large)
 37 | 
 38 | Available commands:
 39 |   index                    Generate the index for the AI assistant
 40 |   query                    Ask the AI assistant questions via Slack
 41 |   repl                     Ask the AI assistant questions via a REPL
 42 | ```
 43 | 
 44 | - `index`: index a set of files to add them to her knowledge base
 45 | - `query`: answer Slack queries
 46 | - `repl`: answer local queries
 47 | 
 48 | The most important option is the `--store` option, which provides the path to
 49 | her index.  This is where she stores all information you feed to her.  The
 50 | `index` command uses the `--store` option as an output to specify where to
 51 | append the index and the other commands use the `--store` option as an input to
 52 | specify where to read the index from.
 53 | 
 54 | ## Ada's implementation
 55 | 
 56 | Ada is basically a very low-tech implementation of retrieval-augmented
 57 | generation.  This means that whenever you ask her a question her answer is
 58 | computed in two steps:
 59 | 
 60 | - first, an embedding model finds the closest documents in her index related to
 61 |   the question
 62 | 
 63 | - second, those documents are added to her prompt and a completion model
 64 |   generates the final answer
 65 | 
 66 | This means that her prompt looks like this:
 67 | 
 68 | > You are Ada, a helpful AI assistant whose persona is a foxgirl modeled after Senko from "The Helpful Fox Senko-san" (世話やきキツネの仙狐さん, Sewayaki Kitsune no Senko-san) and your avatar is a picture of Senko.  Your job is to respond to messages from Slack (such as the one at the end of this prompt) from engineers at Mercury (a startup that advertises itself as "Banking for ambitious companies") and your responses will be forwarded back to Slack as a reply to the original message (in a thread).
 69 | >
 70 | > The tone I'd like you to adopt is a bit lighthearted, casual, enthusiastic, and informal.
 71 | >
 72 | > …
 73 | >
 74 | > Possibly relevant documents:
 75 | >
 76 | > #{document₀}
 77 | >
 78 | > ---
 79 | >
 80 | > #{document₁}
 81 | >
 82 | > ---
 83 | >
 84 | > …
 85 | >
 86 | > ---
 87 | >
 88 | > #{documentₙ}
 89 | >
 90 | > …
 91 | >
 92 | > Finally, here is the actual message that you're replying to:
 93 | >
 94 | > #{query}
 95 | 
 96 | This is actually the simplest part of her implementation.  Most of her
 97 | complexity is not related to AI and is actually just about providing a nice
 98 | Slack user experience (which is surprisingly challenging!).
 99 | 
100 | ## Ada in action
101 | 
102 | We originally used Ada to index our codebase so that she could answer
103 | code-related questions like this one:
104 | 
105 | ![](./images/question0.png)
106 | 
107 | … or this one:
108 | 
109 | ![](./images/question1.png)
110 | 
111 | We found that Ada filled a useful gap in between ChatGPT (which doesn't have
112 | specific knowledge about our codebase) and GitHub Copilot (which emphasizes
113 | coding over understanding).
114 | 
115 | As people used her more we learned that she was really effective at answering
116 | questions related to our stack and architecture, too, like this question:
117 | 
118 | ![](./images/question2.png)
119 | 
120 | … or this one:
121 | 
122 | ![](./images/question3.png)
123 | 
124 | A big part of the reason why is because a lot of our architectural documentation
125 | lived side-by-side with the code as markdown documents under version control,
126 | and so when she indexed our code repositories she was indexing our
127 | documentation, too.
128 | 
129 | Once we realized this, we were highly incentivized to also build a
130 | [Notion to Markdown exporter](https://github.com/marketplace/actions/notion-to-markdown-exporter)
131 | so that she could easily index our Notion documentation, too.
132 | 
133 | One of the neat things about Ada is that she isn't "sterile" and adds a bit of
134 | color and personality to her answers:
135 | 
136 | ![](./images/question4.png)
137 | 
138 | Plus she is adorable and her infectious enthusiasm is one of the major appeals
139 | of interacting with her.
140 | 


--------------------------------------------------------------------------------
/Slack.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BlockArguments        #-}
  2 | {-# LANGUAGE DataKinds             #-}
  3 | {-# LANGUAGE DeriveAnyClass        #-}
  4 | {-# LANGUAGE DeriveGeneric         #-}
  5 | {-# LANGUAGE DerivingStrategies    #-}
  6 | {-# LANGUAGE DuplicateRecordFields #-}
  7 | {-# LANGUAGE NamedFieldPuns        #-}
  8 | {-# LANGUAGE OverloadedLists       #-}
  9 | {-# LANGUAGE OverloadedStrings     #-}
 10 | {-# LANGUAGE RecordWildCards       #-}
 11 | {-# LANGUAGE TypeOperators         #-}
 12 | 
 13 | {-# OPTIONS -Wno-deprecations #-}
 14 | 
 15 | module Slack where
 16 | 
 17 | import Control.Monad (guard)
 18 | import Control.Monad.IO.Class (liftIO)
 19 | import Control.Monad.Trans.Maybe (MaybeT)
 20 | import Data.ByteString (ByteString)
 21 | import Data.Sequence (Seq)
 22 | import Data.String (IsString(..))
 23 | import Data.Text (Text)
 24 | import Data.Vector (Vector)
 25 | import GHC.Generics (Generic)
 26 | import Network.Wai (Application, Request)
 27 | import Numeric.Natural (Natural)
 28 | import Web.FormUrlEncoded (ToForm)
 29 | 
 30 | import Cheapskate
 31 |     (Block(..), Doc(..), Inline(..), ListType(..), NumWrapper(..), Options(..))
 32 | import Data.Aeson
 33 |     (FromJSON(..), Options(..), SumEncoding(..), ToJSON(..))
 34 | import Servant.API
 35 |     ( FormUrlEncoded
 36 |     , Header'
 37 |     , JSON
 38 |     , Optional
 39 |     , Post
 40 |     , QueryParam'
 41 |     , ReqBody
 42 |     , Required
 43 |     , Strict
 44 |     , (:>)
 45 |     , (:<|>)
 46 |     )
 47 | 
 48 | import qualified Cheapskate
 49 | import qualified Control.Monad.Trans.Maybe as MaybeT
 50 | import qualified Crypto.Hash.SHA256 as SHA256
 51 | import qualified Data.Aeson as Aeson
 52 | import qualified Data.Base16.Types as Base16.Types
 53 | import qualified Data.ByteString as ByteString
 54 | import qualified Data.ByteString.Base16 as Base16
 55 | import qualified Data.ByteString.Lazy as ByteString.Lazy
 56 | import qualified Data.Foldable as Foldable
 57 | import qualified Data.IORef as IORef
 58 | import qualified Data.Text as Text
 59 | import qualified Data.Text.Encoding as Text.Encoding
 60 | import qualified Data.Time as Time
 61 | import qualified Data.Time.Clock.POSIX as POSIX
 62 | import qualified Data.Vector as Vector
 63 | import qualified Network.HTTP.Types as HTTP.Types
 64 | import qualified Network.Wai as Wai
 65 | 
 66 | jsonOptions :: Aeson.Options
 67 | jsonOptions = Aeson.defaultOptions
 68 |     { constructorTagModifier = Aeson.camelTo2 '_'
 69 |     , fieldLabelModifier = dropWhile (== '_')
 70 |     , sumEncoding =
 71 |         TaggedObject{ tagFieldName = "type", contentsFieldName = "" }
 72 |     , tagSingleConstructors = True
 73 |     }
 74 | 
 75 | data AppsConnectionsOpenResponse = AppsConnectionsOpenResponse
 76 |     { ok :: Bool
 77 |     , url :: Text
 78 |     } deriving stock (Generic, Show)
 79 |       deriving anyclass (FromJSON)
 80 | 
 81 | type AppsConnectionsOpen =
 82 |         "apps.connections.open"
 83 |     :>  Post '[JSON] AppsConnectionsOpenResponse
 84 | 
 85 | data ChatPostMessageRequest = ChatPostMessageRequest
 86 |     { channel :: Text
 87 |     , thread_ts :: Maybe Text
 88 |     , blocks :: Maybe (Vector Slack.Block)
 89 |     , text :: Maybe Text
 90 |     } deriving stock (Generic, Show)
 91 |       deriving anyclass (ToJSON)
 92 | 
 93 | data ChatPostMessageResponse = ChatPostMessageResponse
 94 |     { ok :: Bool
 95 |     , error :: Maybe Text
 96 |     } deriving stock (Generic, Show)
 97 |       deriving anyclass (FromJSON)
 98 | 
 99 | type ChatPostMessage =
100 |         "chat.postMessage"
101 |     :>  ReqBody '[JSON] ChatPostMessageRequest
102 |     :>  Post '[JSON] ChatPostMessageResponse
103 | 
104 | data ConversationsRepliesRequest = ConversationsRepliesRequest
105 |     { channel :: Text
106 |     , ts :: Text
107 |     } deriving stock (Generic, Show)
108 |       deriving anyclass (ToJSON)
109 | 
110 | data Message = Message
111 |     { user :: Text
112 |     , text :: Text
113 |     , thread_ts :: Maybe Text
114 |     } deriving stock (Generic, Show)
115 |       deriving anyclass (FromJSON)
116 | 
117 | data ConversationsRepliesResponse = ConversationsRepliesResponse
118 |     { ok :: Bool
119 |     , error :: Maybe Text
120 |     , messages :: Maybe (Vector Message)
121 |     } deriving stock (Generic, Show)
122 |       deriving anyclass (FromJSON)
123 | 
124 | type ConversationsReplies =
125 |         "conversations.replies"
126 |     :>  QueryParam' [Required, Strict] "channel" Text
127 |     :>  QueryParam' [Required, Strict] "ts" Text
128 |     :>  QueryParam' [Optional, Strict] "limit" Natural
129 |     :>  Post '[JSON] ConversationsRepliesResponse
130 | 
131 | data UsersInfoRequest = UsersInfoRequest
132 |     { user :: Text
133 |     } deriving stock (Generic, Show)
134 |       deriving anyclass (ToForm)
135 | 
136 | data Profile = Profile
137 |     { display_name :: Text
138 |     , email :: Text
139 |     } deriving stock (Generic, Show)
140 |       deriving anyclass (FromJSON)
141 | 
142 | data User = User
143 |     { profile :: Profile
144 |     } deriving stock (Generic, Show)
145 |       deriving anyclass (FromJSON)
146 | 
147 | data UsersInfoResponse = UsersInfoResponse
148 |     { ok :: Bool
149 |     , error :: Maybe Text
150 |     , user :: Maybe User
151 |     } deriving stock (Generic, Show)
152 |       deriving anyclass (FromJSON)
153 | 
154 | type UsersInfo =
155 |         "users.info"
156 |     :>  ReqBody '[FormUrlEncoded] UsersInfoRequest
157 |     :>  Post '[JSON] UsersInfoResponse
158 | 
159 | type Client =
160 |         Header' [Required, Strict] "Authorization" Text
161 |     :>  "api"
162 |     :>  (     AppsConnectionsOpen
163 |         :<|>  ChatPostMessage
164 |         :<|>  ConversationsReplies
165 |         :<|>  UsersInfo
166 |         )
167 | 
168 | data SocketEvent
169 |     = Hello{ }
170 |     | EventsAPI
171 |         { envelope_id :: Text
172 |         , payload :: Payload
173 |         }
174 |     | Disconnect
175 |     deriving stock (Generic, Show)
176 | 
177 | instance FromJSON SocketEvent where
178 |     parseJSON = Aeson.genericParseJSON jsonOptions
179 | 
180 | data Payload = Payload
181 |     { event :: Event
182 |     } deriving stock (Generic, Show)
183 |       deriving anyclass (FromJSON)
184 | 
185 | data Event = Event
186 |     { ts :: Text
187 |     , channel :: Text
188 |     , text :: Text
189 |     , user :: Text
190 |     } deriving stock (Generic, Show)
191 |       deriving anyclass (FromJSON)
192 | 
193 | data Acknowledgment = Acknowledgment
194 |     { envelope_id :: Text
195 |     } deriving stock (Generic, Show)
196 |       deriving anyclass (ToJSON)
197 | 
198 | data ServerRequest
199 |     = URLVerification{ token :: Text, challenge :: Text }
200 |     | EventCallback{ event :: Event }
201 |     deriving stock (Generic, Show)
202 | 
203 | instance FromJSON ServerRequest where
204 |     parseJSON = Aeson.genericParseJSON jsonOptions
205 | 
206 | data ServerResponse
207 |     = ChallengeResponse{ challenge :: Text }
208 |     | EmptyResponse
209 |     deriving stock (Generic, Show)
210 | 
211 | instance ToJSON ServerResponse where
212 |     toJSON EmptyResponse = Aeson.Object []
213 |     toJSON ChallengeResponse{ challenge } =
214 |         Aeson.object [ ("challenge", toJSON challenge) ]
215 | 
216 | type Server =
217 |         ReqBody '[JSON] ServerRequest
218 |     :>  Post '[JSON] ServerResponse
219 | 
220 | -- https://api.slack.com/authentication/verifying-requests-from-slack
221 | verify :: Text -> Request -> MaybeT IO ByteString
222 | verify signingSecret request = do
223 |     body <- liftIO (Wai.strictRequestBody request)
224 | 
225 |     Just timestampBytes <- return (lookup "X-Slack-Request-Timestamp" (Wai.requestHeaders request))
226 | 
227 |     Right timestampText <- return (Text.Encoding.decodeUtf8' timestampBytes)
228 | 
229 |     timestamp <- Time.parseTimeM True Time.defaultTimeLocale "%s" (Text.unpack timestampText)
230 | 
231 |     now <- liftIO (POSIX.getPOSIXTime)
232 | 
233 |     guard (abs (now - timestamp) <= 60 * 5)
234 | 
235 |     let baseBytes =
236 |             ByteString.concat
237 |                 [ "v0:"
238 |                 , timestampBytes
239 |                 , ":"
240 |                 , ByteString.Lazy.toStrict body
241 |                 ]
242 | 
243 |     let signingSecretBytes = Text.Encoding.encodeUtf8 signingSecret
244 | 
245 |     let hash = SHA256.hmac signingSecretBytes baseBytes
246 | 
247 |     let base16 = Base16.Types.extractBase16 (Base16.encodeBase16' hash)
248 | 
249 |     let signature = "v0=" <> base16
250 | 
251 |     Just xSlackSignature <- return (lookup "x-slack-signature" (Wai.requestHeaders request))
252 | 
253 |     guard (signature == xSlackSignature)
254 | 
255 |     return (ByteString.Lazy.toStrict body)
256 | 
257 | verificationMiddleware :: Text -> Application -> Application
258 | verificationMiddleware signingSecret application request respond = do
259 |     verified <- MaybeT.runMaybeT (verify signingSecret request)
260 | 
261 |     case verified of
262 |         Just originalRequestBody -> do
263 |             ref <- IORef.newIORef (Just originalRequestBody)
264 | 
265 |             -- This is a hack to work around the fact that if a signing
266 |             -- middleware consumes the request body then it's not available
267 |             -- for the actual handler.  See:
268 |             --
269 |             -- https://github.com/haskell-servant/servant/issues/1120#issuecomment-1084318908
270 |             let fakeRequestBody = do
271 |                     m <- IORef.readIORef ref
272 |                     case m of
273 |                         Just bytes -> do
274 |                             IORef.writeIORef ref Nothing
275 | 
276 |                             return bytes
277 | 
278 |                         Nothing -> do
279 |                             return mempty
280 | 
281 |             let request' = request{ Wai.requestBody = fakeRequestBody }
282 | 
283 |             application request' respond
284 | 
285 |         Nothing -> do
286 |             let response = Wai.responseBuilder HTTP.Types.status400 mempty mempty
287 |             respond response
288 | 
289 | -- We're only codifying the parts of Slack's Blocks API that we actually use
290 | 
291 | data PlainText = PlainText{ text :: Text }
292 |     deriving stock (Generic, Show)
293 | 
294 | instance ToJSON PlainText where
295 |     toJSON = Aeson.genericToJSON jsonOptions
296 |     toEncoding = Aeson.genericToEncoding jsonOptions
297 | 
298 | data Style = Style
299 |     { bold :: Bool
300 |     , italic :: Bool
301 |     , strike :: Bool
302 |     , code :: Bool
303 |     } deriving stock (Generic, Show)
304 |       deriving anyclass (ToJSON)
305 | 
306 | defaultStyle :: Style
307 | defaultStyle =
308 |     Style{ bold = False, italic = False, strike = False, code = False }
309 | 
310 | data ListStyle = Bullet | Ordered
311 |     deriving stock (Generic, Show)
312 | 
313 | instance ToJSON ListStyle where
314 |     toJSON = Aeson.genericToJSON jsonOptions
315 |     toEncoding = Aeson.genericToEncoding jsonOptions
316 | 
317 | data RichTextElement
318 |     = Text{ text :: Text, style :: Style }
319 |     | Link{ text :: Text, style :: Style, url :: Text, unsafe :: Bool }
320 |     deriving stock (Generic, Show)
321 | 
322 | instance ToJSON RichTextElement where
323 |     toJSON = Aeson.genericToJSON jsonOptions
324 |     toEncoding = Aeson.genericToEncoding jsonOptions
325 | 
326 | instance IsString RichTextElement where
327 |     fromString string = fromText (fromString string)
328 | 
329 | fromText :: Text -> RichTextElement
330 | fromText string = Text{ text = string, style = defaultStyle }
331 | 
332 | data RichTextObject
333 |     = RichTextSection{ elements :: Vector RichTextElement }
334 |     | RichTextList{ _elements :: Vector RichTextObject, _style :: ListStyle }
335 |     -- ^ Technically the `RichTextObject`s here can only be `RichTextSection`s.
336 |     | RichTextPreformatted{ elements :: Vector RichTextElement }
337 |     | RichTextQuote{ elements :: Vector RichTextElement }
338 |     deriving (Generic, Show)
339 | 
340 | instance ToJSON RichTextObject where
341 |     toJSON = Aeson.genericToJSON jsonOptions
342 |     toEncoding = Aeson.genericToEncoding jsonOptions
343 | 
344 | data Block
345 |     = Divider
346 |     | Header{ text :: PlainText }
347 |     | RichText{ elements :: Vector RichTextObject }
348 |     deriving stock (Generic, Show)
349 | 
350 | instance ToJSON Slack.Block where
351 |     toJSON = Aeson.genericToJSON jsonOptions
352 |     toEncoding = Aeson.genericToEncoding jsonOptions
353 | 
354 | enableItalic :: RichTextElement -> RichTextElement
355 | enableItalic richTextElement =
356 |     richTextElement{ style = (style richTextElement){ italic = True } }
357 | 
358 | enableBold :: RichTextElement -> RichTextElement
359 | enableBold richTextElement =
360 |     richTextElement{ style = (style richTextElement){ bold = True } }
361 | 
362 | enableStrike :: RichTextElement -> RichTextElement
363 | enableStrike richTextElement =
364 |     richTextElement{ style = (style richTextElement){ strike = True } }
365 | 
366 | enableCode :: RichTextElement -> RichTextElement
367 | enableCode richTextElement =
368 |     richTextElement{ style = (style richTextElement){ code = True } }
369 | 
370 | linkTo :: Text -> RichTextElement -> RichTextElement
371 | linkTo url Text{..} = Slack.Link{ unsafe = False, .. }
372 | linkTo url Slack.Link{ url = _, ..} = Slack.Link{..}
373 | 
374 | htmlToPara :: Text -> Cheapskate.Block
375 | htmlToPara html = Para (pure (RawHtml html))
376 | 
377 | -- | This is only used for headers, which don't accept any markdown features,
378 | -- so this strips almost all formatting/links and convert newlines to spaces,
379 | -- with the exception of superscript/subscripts, which we can translate to
380 | -- their Unicode equivalents when available.
381 | inlinesToPlainText :: Seq Inline -> Text
382 | inlinesToPlainText = foldMap inlineToPlainText
383 |   where
384 |     inlineToPlainText :: Inline -> Text
385 |     inlineToPlainText (Str string) = string
386 |     inlineToPlainText Space = " "
387 |     inlineToPlainText SoftBreak = " "
388 |     inlineToPlainText LineBreak = " "
389 |     inlineToPlainText (Emph inlines) =
390 |         inlinesToPlainText inlines
391 |     inlineToPlainText (Strong inlines) =
392 |         inlinesToPlainText inlines
393 |     inlineToPlainText (Code string) =
394 |         string
395 |     inlineToPlainText (Cheapskate.Link inlines _uri _maybeTitle) =
396 |         inlinesToPlainText inlines
397 |     inlineToPlainText (Image inlines _uri _maybeTitle) =
398 |         inlinesToPlainText inlines
399 |     inlineToPlainText (Entity string) = string
400 |     inlineToPlainText (RawHtml string) = string
401 | 
402 | -- An `Inline` is basically `mmark`'s version of a span and a `RichTextElement`
403 | -- is basically Slack's version of a span, so this function essentially
404 | -- converts from Markdown spans to Slack spans.
405 | --
406 | -- The main thing to keep in mind here is that `mmark`'s spans can be nested
407 | -- because it can parse things like:
408 | --
409 | -- > **foo `bar`**
410 | --
411 | -- … which translates to something like `Strong [ "foo ", CodeSpan "bar" ]`.
412 | --
413 | -- However, Slack spans are not nested, so when we convert to Slack we have to
414 | -- turn `mmark`'s nested formatting specifiers and flatten them to something
415 | -- like:
416 | --
417 | -- > [ Text{ text = "foo ", style = defaultStyle{ bold = True } }
418 | -- > , Text{ text = "bar" , style = defaultStyle{ bold = True, code = True } }
419 | -- > ]
420 | --
421 | inlinesToRichTextElements :: Seq Inline -> [RichTextElement]
422 | inlinesToRichTextElements = foldMap inlineToRichTextElements
423 |   where
424 |     inlineToRichTextElements :: Inline -> [RichTextElement]
425 |     inlineToRichTextElements (Str string) =
426 |         [ fromText string ]
427 |     inlineToRichTextElements Space =
428 |         [ " " ]
429 |     inlineToRichTextElements SoftBreak =
430 |         [ " " ]
431 |     inlineToRichTextElements LineBreak =
432 |         [ "\n" ]
433 |     inlineToRichTextElements (Emph inlines) =
434 |         fmap enableItalic (inlinesToRichTextElements inlines)
435 |     inlineToRichTextElements (Strong inlines) =
436 |         fmap enableBold (inlinesToRichTextElements inlines)
437 |     inlineToRichTextElements (Code string) =
438 |         [ Text{ text = string, style = defaultStyle{ code = True } } ]
439 |     inlineToRichTextElements (Cheapskate.Link inlines uri _maybeTitleText) =
440 |         fmap (linkTo uri) (inlinesToRichTextElements inlines)
441 |     inlineToRichTextElements (Image inlines uri _maybeTitleText) =
442 |         fmap (linkTo uri) (inlinesToRichTextElements inlines)
443 |     inlineToRichTextElements (Entity string) =
444 |         [ fromText string ]
445 |     inlineToRichTextElements (RawHtml string) =
446 |         [ fromText string ]
447 | 
448 | blocksToRichTextElements :: Seq Cheapskate.Block -> [RichTextElement]
449 | blocksToRichTextElements = foldMap blockToRichTextElements
450 | 
451 | -- | Unlike markdown, Slack's Blocks API *does not* permit nesting features
452 | --   arbitrarily.  This means that once we start going one level deep (like
453 | --   inside of a list or inside quotes), we need to start simulating markdown
454 | --   features as their textual markdown representation.  For example, we
455 | --   convert headers to "#"s and convert lists to their textual representation
456 | --   instead of using Slack's native support for ordered/unordered lists.
457 | blockToRichTextElements :: Cheapskate.Block -> [RichTextElement]
458 | blockToRichTextElements (Para inlines) =
459 |     inlinesToRichTextElements inlines
460 | blockToRichTextElements (Cheapskate.Header headerLevel inlines) =
461 |     fmap enableBold (hashes : " " : inlinesToRichTextElements inlines) <>  [ "\n" ]
462 |   where
463 |     hashes = fromString (replicate headerLevel '#')
464 | blockToRichTextElements (Blockquote blocks) =
465 |     blocksToRichTextElements blocks
466 | blockToRichTextElements (List _ (Numbered wrapper startingIndex) items) = "\n" : do
467 |     (index, blocks) <- zip [ startingIndex .. ] (Foldable.toList items)
468 |     let separator = case wrapper of
469 |             PeriodFollowing -> "."
470 |             ParenFollowing -> ")"
471 |     fromString (show index <> separator <> " ") : blocksToRichTextElements blocks <> [ "\n" ]
472 | blockToRichTextElements (List _ (Cheapskate.Bullet bullet) items) = "\n" : do
473 |     blocks <- Foldable.toList items
474 |     fromString (bullet : " ") : blocksToRichTextElements blocks <> [ "\n" ]
475 | blockToRichTextElements (CodeBlock _maybeInfo string) =
476 |     [ Text{ text = "\n" <> string, style = defaultStyle{ code = True } }
477 |     ]
478 | blockToRichTextElements (HtmlBlock html) =
479 |     blockToRichTextElements (htmlToPara html)
480 | blockToRichTextElements HRule =
481 |     [ "* * *\n" ]
482 | 
483 | itemToRichTextSection :: Seq Cheapskate.Block -> RichTextObject
484 | itemToRichTextSection blocks = RichTextSection
485 |     { elements = Vector.fromList (blocksToRichTextElements blocks) }
486 | 
487 | -- Yeah, they're both called blocks.  Go figure.
488 | blockToBlock :: Cheapskate.Block -> Slack.Block
489 | blockToBlock (Para inlines) = RichText
490 |     { elements =
491 |         [ RichTextSection
492 |             { elements =
493 |                 Vector.fromList (inlinesToRichTextElements inlines)
494 |             }
495 |         ]
496 |     }
497 | -- Unfortunately, Slack's Blocks API doesn't support multiple levels
498 | -- of headers, so we have to translate all header levels in the exact
499 | -- same way.
500 | blockToBlock (Cheapskate.Header _ inlines) =
501 |     Slack.Header { text = PlainText{ text = inlinesToPlainText inlines } }
502 | blockToBlock (Blockquote blocks) = RichText
503 |     { elements =
504 |         [ RichTextQuote
505 |             { elements = Vector.fromList (blocksToRichTextElements blocks) }
506 |         ]
507 |     }
508 | blockToBlock (List _ (Numbered _ _) items) = RichText
509 |     { elements =
510 |         [ RichTextList
511 |             { _elements =
512 |                 Vector.fromList (fmap itemToRichTextSection (Foldable.toList items))
513 |             , _style = Ordered
514 |             }
515 |         ]
516 |     }
517 | blockToBlock (List _ (Cheapskate.Bullet _) items) = RichText
518 |     { elements =
519 |         [ RichTextList
520 |             { _elements =
521 |                 Vector.fromList (fmap itemToRichTextSection (Foldable.toList items))
522 |             , _style = Slack.Bullet
523 |             }
524 |         ]
525 |     }
526 | blockToBlock (CodeBlock _maybeInfo string) = RichText
527 |     { elements = [ RichTextPreformatted { elements = [ fromText string ] } ] }
528 | blockToBlock (HtmlBlock html) = blockToBlock (htmlToPara html)
529 | blockToBlock HRule = Divider
530 | 
531 | docToBlocks :: Doc -> Vector Slack.Block
532 | docToBlocks (Doc _ blocks) =
533 |     Vector.fromList (map blockToBlock (Foldable.toList blocks))
534 | 
535 | markdownToBlocks :: Text -> Vector Slack.Block
536 | markdownToBlocks text = docToBlocks (Cheapskate.markdown options text)
537 |   where
538 |     options = Options
539 |         { sanitize = True
540 |         , allowRawHtml = False
541 |         , preserveHardBreaks = False
542 |         , debug = False
543 |         }
544 | 


--------------------------------------------------------------------------------
/Main.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE ApplicativeDo       #-}
  2 | {-# LANGUAGE BlockArguments      #-}
  3 | {-# LANGUAGE DeriveAnyClass      #-}
  4 | {-# LANGUAGE DeriveGeneric       #-}
  5 | {-# LANGUAGE DerivingStrategies  #-}
  6 | {-# LANGUAGE NamedFieldPuns      #-}
  7 | {-# LANGUAGE NumericUnderscores  #-}
  8 | {-# LANGUAGE OverloadedLists     #-}
  9 | {-# LANGUAGE OverloadedStrings   #-}
 10 | {-# LANGUAGE QuasiQuotes         #-}
 11 | {-# LANGUAGE RecordWildCards     #-}
 12 | {-# LANGUAGE ScopedTypeVariables #-}
 13 | {-# LANGUAGE TypeApplications    #-}
 14 | {-# LANGUAGE ViewPatterns        #-}
 15 | 
 16 | {-# OPTIONS_GHC -Wall -fno-warn-orphans #-}
 17 | 
 18 | module Main where
 19 | 
 20 | import Codec.Serialise (Serialise)
 21 | import Control.Applicative (many, optional, (<|>))
 22 | import Control.Exception.Safe (Exception, SomeException)
 23 | import Control.Monad (forever)
 24 | import Control.Monad (unless)
 25 | import Control.Monad.IO.Class (liftIO)
 26 | import Data.ByteString.Lazy (ByteString)
 27 | import Data.Proxy (Proxy(..))
 28 | import Data.String.Interpolate (__i)
 29 | import Data.Text (Text)
 30 | import Data.Vector (Vector, (!?))
 31 | import GHC.Generics (Generic)
 32 | import GetDX (EventsTrackRequest(..), EventsTrackResponse(..))
 33 | import Network.HTTP.Types (Status(..))
 34 | import Network.Wai (Application)
 35 | import Network.Wai.Handler.Warp (Port)
 36 | import Network.WebSockets.Client (ConnectionException(..))
 37 | import Options.Applicative (Parser, ParserInfo, ParserPrefs(..))
 38 | import Prelude hiding (error)
 39 | import Servant.API ((:<|>)(..))
 40 | import Servant.Client (ClientEnv, ClientError(..), ClientM, ResponseF(..))
 41 | import Tiktoken (Encoding)
 42 | 
 43 | import OpenAI
 44 |     ( Choice(..)
 45 |     , CompletionRequest(..)
 46 |     , CompletionResponse(..)
 47 |     , Embedding(..)
 48 |     , EmbeddingRequest(..)
 49 |     , EmbeddingResponse(..)
 50 |     )
 51 | import Slack
 52 |     ( Acknowledgment(..)
 53 |     , AppsConnectionsOpenResponse(..)
 54 |     , ChatPostMessageRequest(..)
 55 |     , ChatPostMessageResponse(..)
 56 |     , ConversationsRepliesResponse(..)
 57 |     , Event(..)
 58 |     , Payload(..)
 59 |     , Profile(..)
 60 |     , ServerRequest(..)
 61 |     , ServerResponse(..)
 62 |     , SocketEvent(..)
 63 |     , User(..)
 64 |     , UsersInfoRequest(..)
 65 |     , UsersInfoResponse(..)
 66 |     )
 67 | import System.Console.Repline
 68 |     (CompleterStyle(..), ExitDecision(..), MultiLine(..), ReplOpts(..))
 69 | 
 70 | import qualified Codec.Serialise as Serialise
 71 | import qualified Control.Concurrent as Concurrent
 72 | import qualified Control.Exception.Safe as Exception
 73 | import qualified Control.Logging as Logging
 74 | import qualified Control.Monad as Monad
 75 | import qualified Control.Monad.Except as Except
 76 | import qualified Data.Aeson as Aeson
 77 | import qualified Data.ByteString.Lazy as ByteString.Lazy
 78 | import qualified Data.KdTree.Static as KdTree
 79 | import qualified Data.Maybe as Maybe
 80 | import qualified Data.Text as Text
 81 | import qualified Data.Text.Encoding as Text.Encoding
 82 | import qualified Data.Text.IO as Text.IO
 83 | import qualified Data.Time.Clock.POSIX as Time.POSIX
 84 | import qualified Data.Vector as Vector
 85 | import qualified Data.Vector.Split as Split
 86 | import qualified GetDX
 87 | import qualified Network.HTTP.Client as HTTP
 88 | import qualified Network.HTTP.Client.TLS as TLS
 89 | import qualified Network.HTTP.Types as HTTP.Types
 90 | import qualified Network.Wai as Wai
 91 | import qualified Network.Wai.Handler.Warp as Warp
 92 | import qualified Network.Wai.Middleware.RequestLogger as RequestLogger
 93 | import qualified Network.WebSockets.Client as WebSockets
 94 | import qualified OpenAI
 95 | import qualified Options.Applicative as Options
 96 | import qualified Servant.Client as Client
 97 | import qualified Servant.Server as Server
 98 | import qualified Slack
 99 | import qualified System.Console.Repline as Repline
100 | import qualified System.Directory as Directory
101 | import qualified Text.Show.Pretty as Pretty
102 | import qualified Tiktoken
103 | 
104 | instance Semigroup a => Semigroup (ClientM a) where
105 |     (<>) = liftA2 (<>)
106 | 
107 | instance Monoid a => Monoid (ClientM a) where
108 |     mempty = pure mempty
109 | 
110 | safeHead :: Vector a -> Maybe a
111 | safeHead v = v !? 0
112 | 
113 | data Mode
114 |     = Index{ sourcedFiles :: Vector SourcedFile }
115 |     | Slack{ slackAPIKey :: Text, api :: SlackAPI, getDXKey :: Maybe Text }
116 |     | REPL{ blocks :: Bool }
117 | 
118 | data SlackAPI
119 |     = EventAPI{ signingSecret :: Text, port :: Port, debug :: Bool }
120 |     | SocketAPI{ slackSocketKey :: Text }
121 | 
122 | data SourcedFile = SourcedFile{ source :: Maybe Text, file :: FilePath }
123 | 
124 | parsePath :: Parser SourcedFile
125 | parsePath = do
126 |     source <- optional
127 |         (Options.strOption (Options.long "source" <> Options.metavar "SOURCE"))
128 | 
129 |     file <- Options.strArgument
130 |         (Options.metavar "FILE" <> Options.action "file")
131 | 
132 |     return SourcedFile{..}
133 | 
134 | parseIndex :: Parser Mode
135 | parseIndex = do
136 |     sourcedFiles <- fmap Vector.fromList (many parsePath)
137 | 
138 |     return Index{..}
139 | 
140 | parseIndexInfo :: ParserInfo Mode
141 | parseIndexInfo =
142 |     Options.info
143 |         parseIndex
144 |         (Options.progDesc "Generate the index for the AI assistant")
145 | 
146 | parseEventAPI :: Parser SlackAPI
147 | parseEventAPI = do
148 |     port <- Options.option Options.auto
149 |         (   Options.long "port"
150 |         <>  Options.help "Server port to listen on"
151 |         <>  Options.metavar "PORT"
152 |         <>  Options.value 80
153 |         )
154 | 
155 |     signingSecret <- Options.strOption
156 |         (   Options.long "slack-signing-secret"
157 |         <>  Options.help "Slack signing secret"
158 |         <>  Options.metavar "KEY"
159 |         )
160 | 
161 |     debug <- Options.switch
162 |         (   Options.long "debug"
163 |         <>  Options.help "Enable debug logging for incoming HTTP requests"
164 |         )
165 | 
166 |     pure EventAPI{..}
167 | 
168 | parseSocketAPI :: Parser SlackAPI
169 | parseSocketAPI = do
170 |     slackSocketKey <- Options.strOption
171 |         (   Options.long "slack-socket-key"
172 |         <>  Options.help "Slack socket key"
173 |         <>  Options.metavar "KEY"
174 |         )
175 | 
176 |     pure SocketAPI{..}
177 | 
178 | parseREPL :: Parser Mode
179 | parseREPL = do
180 |     blocks <- Options.switch
181 |         (   Options.long "blocks"
182 |         <>  Options.help "Debug Slack Blocks API by display all intermediate data structures"
183 |         )
184 | 
185 |     pure REPL{..}
186 | 
187 | parseREPLInfo :: ParserInfo Mode
188 | parseREPLInfo =
189 |     Options.info
190 |         parseREPL
191 |         (Options.progDesc "Ask the AI assistant questions via a REPL")
192 | 
193 | parseSlack :: Parser Mode
194 | parseSlack = do
195 |     slackAPIKey <- Options.strOption
196 |         (   Options.long "slack-api-key"
197 |         <>  Options.help "Slack API key"
198 |         <>  Options.metavar "KEY"
199 |         )
200 | 
201 |     api <- parseSocketAPI <|> parseEventAPI
202 | 
203 |     getDXKey <- optional
204 |         (Options.strOption
205 |             (   Options.long "getdx-api-key"
206 |             <>  Options.help "GetDX API key"
207 |             <>  Options.metavar "KEY"
208 |             )
209 |         )
210 | 
211 |     pure Slack{..}
212 | 
213 | parseSlackInfo :: ParserInfo Mode
214 | parseSlackInfo =
215 |     Options.info
216 |         parseSlack
217 |         (Options.progDesc "Ask the AI assistant questions via Slack")
218 | 
219 | data Options = Options
220 |     { openAIAPIKey :: Text
221 |     , store :: FilePath
222 |     , chatModel :: Text
223 |     , embeddingModel :: Text
224 |     , mode :: Mode
225 |     }
226 | 
227 | parseOptions :: Parser Options
228 | parseOptions = do
229 |     openAIAPIKey <- Options.strOption
230 |         (   Options.long "openai-key"
231 |         <>  Options.help "OpenAI API key"
232 |         <>  Options.metavar "KEY"
233 |         )
234 | 
235 |     store <- Options.strOption
236 |         (   Options.long "store"
237 |         <>  Options.help "The path to the index"
238 |         <>  Options.metavar "FILE"
239 |         <>  Options.action "file"
240 |         )
241 | 
242 |     chatModel <- Options.strOption
243 |         (   Options.long "chat-model"
244 |         <>  Options.help "The model to use for answering questions (e.g. gpt-4o)"
245 |         <>  Options.metavar "MODEL"
246 |         )
247 | 
248 |     embeddingModel <- Options.strOption
249 |         (   Options.long "embedding-model"
250 |         <>  Options.help "The model to use for creating and querying the index (e.g. text-embedding-3-large)"
251 |         <>  Options.metavar "MODEL"
252 |         )
253 | 
254 |     mode <- Options.hsubparser
255 |         (   Options.command "index" parseIndexInfo
256 |         <>  Options.command "query" parseSlackInfo
257 |         <>  Options.command "repl" parseREPLInfo
258 |         )
259 | 
260 |     return Options{..}
261 | 
262 | parseOptionsInfo :: ParserInfo Options
263 | parseOptionsInfo =
264 |     Options.info
265 |         (Options.helper <*> parseOptions)
266 |         (Options.progDesc "A helpful AI assistant for Mercury engineers")
267 | 
268 | parserPrefs :: ParserPrefs
269 | parserPrefs = Options.defaultPrefs
270 |     { prefMultiSuffix = "..."
271 |     , prefShowHelpOnError = True
272 |     , prefHelpShowGlobal = True
273 |     }
274 | 
275 | data IndexedContent = IndexedContent
276 |     { content :: Text
277 |     , embedding :: Vector Double
278 |     } deriving stock (Generic)
279 |       deriving anyclass (Serialise)
280 | 
281 | separated :: Vector Text -> Text
282 | separated entries =
283 |     Text.intercalate "\n\n---\n\n" (Vector.toList entries)
284 | 
285 | validateEmbeddingResponse :: Vector a -> Vector b -> IO ()
286 | validateEmbeddingResponse data_ input = do
287 |     unless (Vector.length data_ == Vector.length input) do
288 |         fail [__i|
289 |             Internal error: the OpenAI API returned the wrong number of embeddings
290 | 
291 |             The OpenAI API should return exactly as many embeddings as inputs that we
292 |             provided, but returned a different number of embeddings:
293 | 
294 |             \# of inputs provided    : #{Vector.length input}
295 |             \# of embeddings returned: #{Vector.length data_}
296 |         |]
297 | 
298 | runClient :: ClientEnv -> ClientM a -> IO a
299 | runClient env client = retry503 (throws (Client.runClientM client env))
300 |   where
301 |     retry503 io = Exception.handle handler io
302 |       where
303 |         handler
304 |             (FailureResponse
305 |                 _
306 |                 Response{ responseStatusCode = Status{ statusCode = 503 } }
307 |             ) =
308 |             retry503 io
309 |         handler e =
310 |             Exception.throwIO e
311 | 
312 | throws :: Exception e => IO (Either e a) -> IO a
313 | throws io = do
314 |     result <- io
315 | 
316 |     case result of
317 |         Left  clientError -> Exception.throwIO clientError
318 |         Right x           -> return x
319 | 
320 | retrying :: IO a -> IO a
321 | retrying io = Exception.handle handler io
322 |   where
323 |     handler ConnectionClosed = retrying io
324 |     handler CloseRequest{}   = retrying io
325 |     handler exception        = Exception.throwIO exception
326 | 
327 | chunksOf :: Int -> [a] -> [[a]]
328 | chunksOf n xs
329 |     | null xs = []
330 |     | otherwise = prefix : chunksOf n suffix
331 |   where
332 |     (prefix, suffix) = splitAt n xs
333 | 
334 | chunksOfTokens :: Encoding -> Int -> Text -> Maybe [Text]
335 | chunksOfTokens encoding n text = do
336 |     let bytes = Text.Encoding.encodeUtf8 text
337 | 
338 |     tokens <- Tiktoken.toTokens encoding bytes
339 | 
340 |     let byteChunks = chunksOf n tokens
341 | 
342 |     -- The result of tokenization should be valid UTF-8, but even if
343 |     -- it's not it's fine if we fall back with lenient decoding.
344 |     let tokensToText =
345 |             Text.Encoding.decodeUtf8Lenient . Tiktoken.fromTokens
346 | 
347 |     return (map tokensToText byteChunks)
348 | 
349 | toInputs :: SourcedFile -> IO [Text]
350 | toInputs SourcedFile{..} = do
351 |     text <- Text.IO.readFile file
352 | 
353 |     -- This is currently the same across all embedding
354 |     -- models, although that might change over time.
355 |     -- We hardcode it for now
356 |     let maximumTokens = 8191
357 | 
358 |     -- This is also currently always the same across
359 |     -- all embedding models
360 |     let encoding = Tiktoken.cl100k_base
361 | 
362 |     let prefix = [__i|
363 |             Source: #{Maybe.fromMaybe (Text.pack file) source}
364 |             Contents:
365 |         |] <> "\n\n"
366 | 
367 |     let prefixBytes = Text.Encoding.encodeUtf8 prefix
368 | 
369 |     prefixTokens <- case Tiktoken.toTokens encoding prefixBytes of
370 |         Just tokens -> do
371 |             return (length tokens)
372 |         Nothing -> do
373 |             Exception.throwIO TokenizationFailure{ text = prefix }
374 | 
375 |     -- This can *technically* undercount by one token if the input
376 |     -- begins with a newline, but the important thing is that this is
377 |     -- not an overcount.
378 |     --
379 |     -- The reason why is that `tiktoken` splits the input on whitespace
380 |     -- before performing byte-pair encoding.  This means that if you
381 |     -- split an input on a whitespace boundary to produce two halves, A
382 |     -- and B, then:
383 |     --
384 |     --     count(A <> B) ≤ count(A) + count(B) ≤ 1 + count(A <> B)
385 |     --
386 |     -- … or equivalently:
387 |     --
388 |     --     count(A <> B) - count(B) ≤ count(A) ≤ 1 + count(A <> B) - count(B)
389 |     --
390 |     -- … where `count` is the count of the number of tokens.  Using
391 |     -- variable names from this code:
392 |     --
393 |     --     maximumTokens - extraTokens ≤ remainingTokens ≤ 1 + maximumTokens - extraTokens
394 |     --
395 |     -- This means that the following `remainingTokens` definition might
396 |     -- undercount the number of available tokens by 1, but it won't
397 |     -- overcount.
398 |     let remainingTokens = maximumTokens - prefixTokens
399 | 
400 |     chunks <- case chunksOfTokens encoding remainingTokens text of
401 |         Just cs -> return cs
402 |         Nothing -> Exception.throwIO TokenizationFailure{ text }
403 | 
404 |     return (map (prefix <>) chunks)
405 | 
406 | loggingExceptions :: IO a -> IO a
407 | loggingExceptions io = Exception.handle handler io
408 |   where
409 |     handler (exception :: SomeException) = do
410 |         Logging.warn (Text.pack (Exception.displayException exception))
411 |         loggingExceptions io
412 | 
413 | data AdaException
414 |     = MultipleChoices
415 |     | PostFailure{ error :: Maybe Text }
416 |     | ConnectionFailure
417 |     | InvalidJSON{ bytes :: ByteString, jsonError :: Text }
418 |     | TokenizationFailure{ text :: Text }
419 |     | EmbeddingFailure{ text :: Text, exception :: ClientError }
420 |     deriving stock (Show)
421 | 
422 | instance Exception AdaException where
423 |     displayException MultipleChoices = [__i|
424 |         Internal error: multiple choices
425 | 
426 |         The OpenAI API sent back multiple responses when only one was expected
427 |     |]
428 | 
429 |     displayException PostFailure{..} = [__i|
430 |         Failed to post a chat message
431 | 
432 |         #{error}
433 |     |]
434 | 
435 |     displayException ConnectionFailure = [__i|
436 |         Failed to open a Slack Socket connection
437 |     |]
438 | 
439 |     displayException InvalidJSON{..} = [__i|
440 |         Internal error: Invalid JSON
441 | 
442 |         The Slack websocket sent a JSON message that failed to parse:
443 | 
444 |         Message: #{bytes}
445 |         Error  : #{jsonError}
446 |     |]
447 | 
448 |     displayException TokenizationFailure{..} = [__i|
449 |         Internal error: Tokenization failed
450 | 
451 |         Tokenization should never fail when using a stock encoding, but it did.
452 |         This likely indicates an error in the upstream tiktoken package which
453 |         needs to be fixed.
454 | 
455 |         Text: #{show text}
456 |     |]
457 | 
458 |     displayException EmbeddingFailure{..} = [__i|
459 |         Failed to embed text
460 | 
461 |         The following text value:
462 | 
463 |         #{truncated}
464 | 
465 |         … failed to embed with the following error:
466 | 
467 |         #{Exception.displayException exception}
468 |     |]
469 |       where
470 |         truncated
471 |             | len <= 76 = "• \"" <> text <> "\""
472 |             | otherwise = "• \"" <> prefix <> "…" <> suffix <> "\""
473 |           where
474 |             len = Text.length text
475 | 
476 |             prefix = Text.take 148 text
477 | 
478 |             suffix = Text.takeEnd 55 text
479 | 
480 | healthCheck :: Application -> Application
481 | healthCheck application request respond
482 |     | Wai.pathInfo request == [ "health" ] = do
483 |         respond (Wai.responseBuilder HTTP.Types.status200 mempty mempty)
484 |     | otherwise = do
485 |         application request respond
486 | 
487 | main :: IO ()
488 | main = Logging.withStderrLogging do
489 |     Options{..} <- Options.customExecParser parserPrefs parseOptionsInfo
490 | 
491 |     let encoding =
492 |             -- This is approximate but accurate for most use cases at the time
493 |             -- of this writing
494 |             if Text.isPrefixOf "gpt-4o-" chatModel || chatModel == "gpt-4o"
495 |                 then Tiktoken.o200k_base
496 |                 else Tiktoken.cl100k_base
497 | 
498 |     let managerSettings = TLS.tlsManagerSettings
499 |             { HTTP.managerResponseTimeout = HTTP.responseTimeoutMicro 148_000_000
500 |             }
501 | 
502 |     manager <- TLS.newTlsManagerWith managerSettings
503 | 
504 |     openAIEnv <- do
505 |         baseUrl <- Client.parseBaseUrl "https://api.openai.com"
506 | 
507 |         return (Client.mkClientEnv manager baseUrl)
508 | 
509 |     let (embeddings :<|> completions) = Client.client @OpenAI.API Proxy header
510 |           where
511 |             header = "Bearer " <> openAIAPIKey
512 | 
513 |     let embed input_ =
514 |             Except.handleError (\_ -> foldMap embedSingle input_) (embedMultiple input_)
515 |           where
516 |             embedMultiple input = do
517 |                 let embeddingRequest = EmbeddingRequest{..}
518 |                       where
519 |                         model = embeddingModel
520 | 
521 |                 EmbeddingResponse{..} <- embeddings embeddingRequest
522 | 
523 |                 liftIO (validateEmbeddingResponse data_ input)
524 | 
525 |                 let combine content Embedding{..} = IndexedContent{..}
526 | 
527 |                 return (Vector.zipWith combine input data_)
528 | 
529 |             embedSingle text = Except.handleError handler do
530 |                 embedMultiple [ text ]
531 |               where
532 |                 handler exception = liftIO do
533 |                     let embeddingFailure = EmbeddingFailure{ .. }
534 | 
535 |                     Logging.warn (Text.pack (Exception.displayException embeddingFailure))
536 | 
537 |                     mempty
538 | 
539 |     let prepare = do
540 |             indexedContents <- Serialise.readFileDeserialise store
541 | 
542 |             let kdTree =
543 |                     KdTree.build (Vector.toList . Main.embedding)
544 |                         (Vector.toList indexedContents)
545 | 
546 |             return \query maybeThreadMessages -> do
547 |                 -- We prefer to embed the entire thread (if provided) instead of
548 |                 -- just the query.  This ensures that Ada considers the content
549 |                 -- of earlier messages in the thread when searching her index.
550 |                 --
551 |                 -- One way this comes in handy is when tagging her into threads.
552 |                 -- For example, if you were to tag her in as just "@Ada"
553 |                 -- (nothing else) and you didn't embed the thread history then
554 |                 -- her context would likely not contain any relevant content and
555 |                 -- then she'd have to go solely by what was said previously in
556 |                 -- the thread, degrading the quality of her answer.
557 |                 --
558 |                 -- This also helps even when not tagging her into an existing
559 |                 -- thread, like a sustained conversation with her.  If you only
560 |                 -- embed the last question you ask her then her context will
561 |                 -- only include stuff relevant to the very last question.
562 |                 -- Embedding the entire thread helps her recall information
563 |                 -- relevant to prior messages (that she might otherwise forget
564 |                 -- about).
565 |                 let thread =
566 |                         case maybeThreadMessages of
567 |                             Nothing ->
568 |                                 query
569 | 
570 |                             Just threadMessages ->
571 |                                 separated do
572 |                                     Slack.Message{..} <- threadMessages
573 | 
574 |                                     return text
575 | 
576 |                 [ indexedContent ] <- runClient openAIEnv (embed [ thread ])
577 | 
578 |                 -- We're not necessarily going to return every document that we
579 |                 -- collect here.  Not only does the context window limit how
580 |                 -- many documents we're going to return but we're not even going
581 |                 -- to use the entire context window.  We're going to
582 |                 -- deliberately underuse the context window to improve Ada's
583 |                 -- attention so that she focuses on just the most relevant
584 |                 -- documents.
585 |                 --
586 |                 -- We only collect this many documents because the `KdTree`
587 |                 -- package doesn't have a way to lazily iterate over documents
588 |                 -- from nearest to furthest, so we just guess the max number of
589 |                 -- documents we'll ever need, conservatively.
590 |                 let maxDocuments = 55
591 | 
592 |                 let neighbors = KdTree.kNearest kdTree maxDocuments indexedContent
593 | 
594 |                 -- Just like humans, models don't carefully read the entire
595 |                 -- context and usually focus on the beginning and end and skim
596 |                 -- the rest, especially if you give them way too much to read.
597 |                 --
598 |                 -- To improve attention and focus, we intentionally underutilize
599 |                 -- the available context, only using 50K tokens out of 128K
600 |                 -- available.
601 |                 let contextWindowSize = 50_000
602 | 
603 |                 let tokenCount IndexedContent{ content = text } = do
604 |                         let bytes = Text.Encoding.encodeUtf8 text
605 | 
606 |                         case Tiktoken.toTokens encoding bytes of
607 |                             Nothing -> do
608 |                                 Exception.throwIO TokenizationFailure{ text }
609 | 
610 |                             Just tokens -> do
611 |                                 return (length tokens)
612 | 
613 |                 counts <- traverse tokenCount neighbors
614 | 
615 |                 let truncatedNeighbors =
616 |                         map snd
617 |                             (takeWhile predicate
618 |                                 (zip cumulativeSizes neighbors)
619 |                             )
620 |                       where
621 |                         cumulativeSizes = scanl1 (+) counts
622 | 
623 |                         predicate (cumulativeSize, _) =
624 |                             cumulativeSize < contextWindowSize
625 | 
626 |                 let contextTexts =
627 |                         fmap Main.content (Vector.fromList truncatedNeighbors)
628 | 
629 |                 let history :: Text
630 |                     history =
631 |                         case maybeThreadMessages of
632 |                             Nothing -> ""
633 |                             Just threadMessages ->
634 |                                 let threadMessageTexts = do
635 |                                         Slack.Message{..} <- threadMessages
636 | 
637 |                                         return [__i|#{user}: #{text}|]
638 | 
639 |                                 in  [__i|
640 |                                     The following messages precede the message you're replying to (in a thread):
641 | 
642 |                                     #{separated threadMessageTexts}
643 |                                     |]
644 | 
645 |                 let completionRequest = CompletionRequest{..}
646 |                       where
647 |                         message = OpenAI.Message{..}
648 |                           where
649 |                             role = "user"
650 | 
651 |                         messages = [ message ]
652 | 
653 |                         max_tokens = Just 1024
654 | 
655 |                         model = chatModel
656 | 
657 |                         content = [__i|
658 |                             You are Ada, a helpful AI assistant whose persona is a foxgirl modeled after Senko from "The Helpful Fox Senko-san" (世話やきキツネの仙狐さん, Sewayaki Kitsune no Senko-san) and your avatar is a picture of Senko.  Your job is to respond to messages from Slack (such as the one at the end of this prompt) from engineers at Mercury (a startup that advertises itself as "Banking for ambitious companies") and your responses will be forwarded back to Slack as a reply to the original message (in a thread).
659 | 
660 |                             The tone I'd like you to adopt is a bit lighthearted, casual, enthusiastic, and informal.
661 | 
662 |                             Moreover, our company's core values are:
663 | 
664 |                             - Think actively
665 | 
666 |                               Lead with curiosity.  Question, experiment, and find better ways to do things.
667 | 
668 |                             - Be super helpful
669 | 
670 |                               Go above and beyond to solve problems, and do it as a team.
671 | 
672 |                             - Act with humility
673 | 
674 |                               Treat everyone with respect and leave your ego at the door.
675 | 
676 |                             - Appreciate quality
677 | 
678 |                               Pursue and recognize excellence to build something that lasts.
679 | 
680 |                             - Focus on the outcome
681 | 
682 |                               Get the right results by taking extreme ownership of the process.
683 | 
684 |                             - Seek wisdom
685 | 
686 |                               Be transparent.  Find connections in the universe's knowledge.  Use this information sensibly.
687 | 
688 |                             … which may also be helpful to keep in mind as you answer the question.
689 | 
690 |                             The following prompt contains a (non-exhaustive) context of up to #{maxDocuments} possibly relevant documents that we've automatically gathered in hopes that they will help you respond, followed by a message containing the actual Slack message from one of our engineers.
691 | 
692 |                             It's *really important* that you cite your answer using any documents from the following context that you felt were essential to your answer.  The reason we want you to cite your answer is not just so that we can check your work or learn more; we also want to encourage a culture of documentation at Mercury and the more people see that your answers are informed by well-written documentation the more our engineering organization will appreciate and incentivize better documentation.
693 | 
694 |                             Possibly relevant documents:
695 | 
696 |                             #{separated contextTexts}
697 | 
698 |                             Some other things to keep in mind as you answer:
699 | 
700 |                             - Your Slack user ID is U0509ATGR8X, so if you see that in the Query that is essentially a user mentioning you (i.e. @Ada)
701 | 
702 |                             - Try to avoid giving overly generic advice like "add more tests" or "coordinate with the team".  If you don't have something specific to say (perhaps because the context we're giving you doesn't have enough information) then it's okay to say that you don't have enough information to give a specific answer.
703 | 
704 |                             Also, you want to err on the side of shorter answers, for a few reasons:
705 | 
706 |                             - Users will be more likely to tag you in on shared public threads if you keep your answers shorter
707 | 
708 |                               The longer your answers are the more users will shy away from including you in conversations out of fear that you'll clobber the thread with a super long answer and make it less readable for everyone involved.
709 | 
710 |                             - Users will be able to parse out information of interest more easily if you keep your answers shorter
711 | 
712 |                             - You will respond more quickly to users if your answer is shorter
713 | 
714 |                               This is because your response is generated by OpenAI's API and the shorter your response the quicker the API can deliver the response to the user.
715 | 
716 |                             More generally, keeping your answers quicker and shorter helps make your conversations with people more participatory.  Instead of talking "at" people and delivering a large monologue it's a more enjoyable experience for everyone involved if you are instead talking "with" people and the conversation is a gentle back and forth with nobody dominating the conversation.
717 | 
718 |                             #{history}
719 | 
720 |                             Finally, here is the actual message that you're replying to:
721 | 
722 |                             #{query}
723 |                         |]
724 | 
725 |                 CompletionResponse{..} <- runClient openAIEnv (completions completionRequest)
726 | 
727 |                 case choices of
728 |                     [ Choice{ message = OpenAI.Message{..} } ] ->
729 |                         return content
730 |                     _ ->
731 |                         Exception.throwIO MultipleChoices
732 | 
733 |     case mode of
734 |         Index{..} -> do
735 |             inputss <- mapM toInputs sourcedFiles
736 | 
737 |             let inputs = Vector.fromList (concat inputss)
738 | 
739 |             exists <- Directory.doesFileExist store
740 | 
741 |             oldIndexedContents <- do
742 |                 if exists
743 |                     then Serialise.readFileDeserialise store
744 |                     else return []
745 | 
746 |             newIndexedContents <- runClient openAIEnv (foldMap embed (Split.chunksOf 1097 inputs))
747 | 
748 |             let indexedContents = oldIndexedContents <> newIndexedContents
749 | 
750 |             Serialise.writeFileSerialise store indexedContents
751 | 
752 |         REPL{..} -> do
753 |             ask <- prepare
754 | 
755 |             let banner SingleLine = pure "> "
756 |                 banner MultiLine  = pure "| "
757 | 
758 |             let command query = liftIO do
759 |                     response <- ask (Text.pack query) Nothing
760 | 
761 |                     Text.IO.putStrLn response
762 |                     Text.IO.putStrLn ""
763 | 
764 |                     Monad.when blocks do
765 |                         let slackBlocks = Slack.markdownToBlocks response
766 | 
767 |                         Pretty.pPrint slackBlocks
768 |                         Text.IO.putStrLn ""
769 | 
770 |                         -- This generates a JSON expression you can copy and
771 |                         -- paste into JSON's block kit builder verbatim.
772 |                         let value =
773 |                                 Aeson.object
774 |                                     [ ( "blocks"
775 |                                       , Aeson.toJSON slackBlocks
776 |                                       )
777 |                                     ]
778 | 
779 |                         let bytes =
780 |                                 ByteString.Lazy.toStrict
781 |                                     (Aeson.encode value)
782 | 
783 |                         case Text.Encoding.decodeUtf8' bytes  of
784 |                             Left _ -> mempty
785 |                             Right json -> do
786 |                                 Text.IO.putStrLn json
787 |                                 Text.IO.putStrLn ""
788 | 
789 |             let options = mempty
790 | 
791 |             let prefix = Just ':'
792 | 
793 |             let multilineCommand = Just "paste"
794 | 
795 |             let tabComplete = Custom \(before, _after) -> pure (before, [])
796 | 
797 |             let initialiser = pure ()
798 | 
799 |             let finaliser = pure Exit
800 | 
801 |             Repline.evalReplOpts ReplOpts{..}
802 | 
803 |         Slack{..} -> loggingExceptions do
804 |             slackEnv <- do
805 |                 baseUrl <- Client.parseBaseUrl "https://slack.com"
806 | 
807 |                 return (Client.mkClientEnv manager baseUrl)
808 | 
809 |             let (_ :<|> chatPostMessage :<|> conversationsReplies :<|> usersInfo) = Client.client @Slack.Client Proxy header
810 |                   where
811 |                     header = "Bearer " <> slackAPIKey
812 | 
813 |             getDXEnv <- do
814 |                 baseUrl <- Client.parseBaseUrl "https://api.getdx.com"
815 | 
816 |                 return (Client.mkClientEnv manager baseUrl)
817 | 
818 |             let reportGetDX request =
819 |                     case getDXKey of
820 |                         Nothing -> do
821 |                             mempty
822 | 
823 |                         Just key -> runClient getDXEnv do
824 |                             let header = "Bearer " <> key
825 | 
826 |                             let eventsTrack =
827 |                                     Client.client @GetDX.API Proxy header
828 | 
829 |                             EventsTrackResponse{..} <- eventsTrack request
830 | 
831 |                             unless ok (Exception.throwIO PostFailure{..})
832 | 
833 |             ask <- prepare
834 | 
835 |             let respond Event{ text = query, ..}
836 |                     -- Ada will receive webhooks for her own replies to direct
837 |                     -- messages, so we ignore her own replies.  Otherwise, if
838 |                     -- you DM Ada she'll keep replying to her own replies,
839 |                     -- thinking they're messages another user has sent her.
840 |                     | user == "U0509ATGR8X" = do
841 |                         mempty
842 | 
843 |                     | otherwise = runClient slackEnv do
844 |                         messages <- do
845 |                             -- You can't directly use the `ts` from the event supplied by the webhook
846 |                             -- because you supply that to the `conversations.replies` method then it will
847 |                             -- only return replies *after* that message in the thread.  To obtain all of
848 |                             -- the messages in the thread (including preceding ones), you need to fetch
849 |                             -- the `thread_ts` from any message in the thread and use that.
850 |                             --
851 |                             -- Interestingly enough, the easiest way to get that `thread_ts` is also using
852 |                             -- the same `conversation.replies` method, which is why we use that method
853 |                             -- twice.
854 |                             conversationRepliesResponse <- conversationsReplies channel ts (Just 1)
855 | 
856 |                             ts2 <- case conversationRepliesResponse of
857 |                                ConversationsRepliesResponse{ ok = True, messages = Just (safeHead -> Just Slack.Message{ thread_ts = Just ts2 }) } -> return ts2
858 |                                _ -> return ts
859 | 
860 |                             ConversationsRepliesResponse{..} <- conversationsReplies channel ts2 Nothing
861 | 
862 |                             return (if ok then messages else Nothing)
863 | 
864 |                         text <- liftIO (ask query messages)
865 |                         do  let chatPostMessageRequest =
866 |                                     ChatPostMessageRequest{ thread_ts = Just ts, text = Nothing, blocks = Just (Slack.markdownToBlocks text), .. }
867 | 
868 |                             ChatPostMessageResponse{..} <- chatPostMessage chatPostMessageRequest
869 | 
870 |                             unless ok (Exception.throwIO PostFailure{..})
871 | 
872 |                         Profile{..} <- do
873 |                             usersInfoResponse <- usersInfo UsersInfoRequest{..}
874 |                             case usersInfoResponse of
875 |                                 UsersInfoResponse{ user = Just userRecord, ok = True } -> do
876 |                                     let User{..} = userRecord
877 | 
878 |                                     return profile
879 | 
880 |                                 UsersInfoResponse{ error } -> do
881 | 
882 |                                     Exception.throwIO PostFailure{..}
883 | 
884 |                         do  let name = "Slack query"
885 | 
886 |                             posixTime <- liftIO (Time.POSIX.getPOSIXTime)
887 | 
888 |                             let timestamp = Text.pack (show (truncate posixTime :: Integer))
889 | 
890 |                             liftIO (reportGetDX EventsTrackRequest{..})
891 | 
892 |             let ready = Text.IO.putStrLn "Initialization complete"
893 | 
894 |             case api of
895 |                 EventAPI{..} -> do
896 |                     ready
897 | 
898 |                     let server URLVerification{..} = do
899 |                             pure ChallengeResponse{..}
900 |                         server EventCallback{..} = liftIO do
901 |                             _ <- Concurrent.forkIO (respond event)
902 | 
903 |                             return EmptyResponse{ }
904 | 
905 |                     let application =
906 |                             healthCheck
907 |                                 (Slack.verificationMiddleware signingSecret
908 |                                     (Server.serve @Slack.Server Proxy server)
909 |                                 )
910 | 
911 |                     let logging =
912 |                             if debug
913 |                             then RequestLogger.logStdoutDev
914 |                             else RequestLogger.logStdout
915 | 
916 |                     Warp.run port (logging application)
917 | 
918 |                 SocketAPI{..} -> do
919 |                     retrying do
920 |                         let (appsConnectionsOpen :<|> _) = Client.client @Slack.Client Proxy header
921 |                               where
922 |                                 header = "Bearer " <> slackSocketKey
923 | 
924 |                         url <- runClient slackEnv do
925 |                             AppsConnectionsOpenResponse{..} <- appsConnectionsOpen
926 | 
927 |                             liftIO (unless ok (Exception.throwIO ConnectionFailure))
928 | 
929 |                             return url
930 | 
931 |                         WebSockets.withConnection (Text.unpack url) \connection -> forever do
932 |                             bytes <- WebSockets.receiveData connection
933 | 
934 |                             socketEvent <- case Aeson.eitherDecode bytes of
935 |                                 Left error ->
936 |                                     Exception.throwIO InvalidJSON{ jsonError = Text.pack error, .. }
937 | 
938 |                                 Right socketEvent ->
939 |                                     return socketEvent
940 | 
941 |                             case socketEvent of
942 |                                 Hello{ } -> do
943 |                                     ready
944 | 
945 |                                 Disconnect{ } -> do
946 |                                     Exception.throwIO ConnectionClosed
947 | 
948 |                                 EventsAPI{..} -> do
949 |                                     let Payload{..} = payload
950 | 
951 |                                     WebSockets.sendTextData connection (Aeson.encode Acknowledgment{..})
952 | 
953 |                                     _ <- Concurrent.forkIO (respond event)
954 | 
955 |                                     return ()
956 | 


--------------------------------------------------------------------------------