├── .env.example
├── .gitignore
├── .gitmodules
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── SETUP.md
├── commands.txt
├── docker-compose.yaml
├── hasura_discord_postgres
    ├── data
    │   ├── create.sql
    │   └── seed.sql
    └── postgres
    │   ├── config.yaml
    │   ├── metadata
    │       ├── actions.graphql
    │       ├── actions.yaml
    │       ├── allow_list.yaml
    │       ├── api_limits.yaml
    │       ├── backend_configs.yaml
    │       ├── cron_triggers.yaml
    │       ├── databases
    │       │   ├── databases.yaml
    │       │   └── postgres
    │       │   │   └── tables
    │       │   │       ├── public_COLLECTION_ENUM.yaml
    │       │   │       ├── public_configuration.yaml
    │       │   │       ├── public_guild_forums.yaml
    │       │   │       ├── public_message.yaml
    │       │   │       ├── public_thread.yaml
    │       │   │       └── tables.yaml
    │       ├── graphql_schema_introspection.yaml
    │       ├── inherited_roles.yaml
    │       ├── metrics_config.yaml
    │       ├── network.yaml
    │       ├── opentelemetry.yaml
    │       ├── query_collections.yaml
    │       ├── remote_schemas.yaml
    │       ├── rest_endpoints.yaml
    │       └── version.yaml
    │   └── migrations
    │       └── postgres
    │           └── 1708450043631_init
    │               └── up.sql
├── hasura_discord_qdrant
    ├── config.yaml
    └── snapshots
    │   └── v3.snapshot
├── images
    ├── image1.gif
    ├── image10.png
    ├── image11.png
    ├── image12.png
    ├── image13.gif
    ├── image2.gif
    ├── image3.png
    ├── image4.png
    ├── image5.png
    ├── image6.png
    ├── image7.png
    ├── image8.png
    ├── image9.png
    └── setup
    │   ├── add_bot_to_server.png
    │   ├── bot_message.png
    │   ├── bot_settings.png
    │   ├── bot_talk.png
    │   ├── commands.png
    │   ├── forum_channel.png
    │   ├── generated_url.png
    │   ├── hasura_console.png
    │   ├── hello.png
    │   ├── hello_world.png
    │   ├── logging_channel.png
    │   ├── name_application.png
    │   ├── new_application.jpeg
    │   ├── qdrant_dashboard.png
    │   ├── redoc.png
    │   ├── reset_token.jpeg
    │   ├── role_id.png
    │   ├── search.png
    │   ├── search_results.png
    │   ├── server_id.png
    │   ├── server_settings.png
    │   ├── set_scopes.png
    │   ├── swagger.png
    │   ├── test_post.png
    │   └── token.png
├── seed.json
└── seed.py


/.env.example:
--------------------------------------------------------------------------------
 1 | POSTGRES_PASSWORD=postgrespassword
 2 | POSTGRES_URL=postgres://postgres:postgrespassword@postgres:5432/postgres
 3 | BACKEND_URL=http://hasura_discord_backend:8100
 4 | HASURA_GRAPHQL_URL=http://graphql-engine:8080/v1/graphql
 5 | HASURA_GRAPHQL_ADMIN_SECRET=secret
 6 | SEARCH_ENDPOINT_URL=http://hasura_discord_backend:8100/search/
 7 | BACKEND_API_KEY_HEADER_NAME=X-API-KEY
 8 | BACKEND_API_KEY=secret
 9 | QDRANT_URL=http://qdrant:6333
10 | QDRANT_API_KEY=secret
11 | OPENAI_MODEL=gpt-4-turbo-preview
12 | OPENAI_EMBEDDING_MODEL=text-embedding-3-large
13 | VECTOR_SIZE=3072
14 | GUILD_ID=<GUILD_ID>
15 | DISCORD_CLIENT_SECRET=<CLIENT_SECRET>
16 | OPENAI_API_KEY=<API_KEY>
17 | OPENAI_ORGANIZATION=<ORGANIZATION_ID>
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | venv/
2 | .idea/
3 | .DS_Store
4 | .env


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "hasura_discord_backend"]
2 | 	path = hasura_discord_backend
3 | 	url = https://github.com/hasura/hasura_discord_backend.git
4 | [submodule "hasura_discord_bot"]
5 | 	path = hasura_discord_bot
6 | 	url = https://github.com/hasura/hasura_discord_bot.git
7 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Hello! 👋
 2 | 
 3 | We're glad you are here!
 4 | 
 5 | If you are interested in contributing, please feel free to fork the repo, create a branch, and open a PR.
 6 | 
 7 | If your PR has something interesting or useful, it'll get merged! Feel free to reach out in the Hasura Discord Channel
 8 | to discuss ideas. We will add onto this page if there is interest.
 9 | 
10 | I'll certainly be adding things to this bot, I hope to make it a bit of a QA-hub and add support for things like
11 | Zendesk, Slack, and maybe even create threads from StackOverflow and Reddit as well.
12 | 
13 | Some things I'm interested in doing:
14 | 
15 | * Add user tracking, and add context to the bot on who the users are
16 | * Use the user tracking to implement a leaderboard
17 | * Instead of only marking something as solved, allow also the marking of specific messages as helpful or non-helpful
18 | * Add a slash command for generating a "golden thread". I.e. Convert the conversation into a perfect conversation based
19 |   on the determined final answer. Perhaps...
20 |     1. The bot uses the entire thread to create questions to get needed details the user reveals throughout the
21 |        conversation.
22 |     2. The bot generates the answers to those questions, pretending to be the user.
23 |     3. The bot generates the final answer having got the important information from the user.
24 |     4. This new thread is used in the future to fine-tune the bot.
25 | * The current database design is very simple. It would be nice to decouple things from Discord and add an enum so that
26 |   the same underlying chatbot could work with multiple tools. I.e. a source field on the thread and each bot can have
27 |   its own task_loop.
28 | * It would be good to add some commands/crons for report generation, especially pretty PDFs with charts
29 | * We should automate the vectorization of conversations and things, and also have 1 collection with ALL incoming info in
30 |   it aggregated.
31 | * It might be good to do things like use ChatGPT to curate tags and create enriched metadata for the data-points.
32 | * It would be good to make it so that prompts and more of the "constants" are stored in the database. This would make it
33 |   simpler to do things like change the prompts for example.
34 | * It would be good to be able to benchmark against different prompts.
35 | * It would be good to add a front-end wrapper with a purpose-designed application rather than having to deal with
36 |   external APIs.
37 | * There are lots of opportunities to add slash commands and things.
38 | 
39 | We host office hours every Tuesday at 10AM PST, (As in me! The guy who wrote this whole thing, *I* host the Office
40 | hours, so come keep me company!)
41 | 
42 | [Join our Discord server](https://discord.gg/hasura)


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # A Discord Bot to converse with documentation using GPT-4 + RAG
  2 | 
  3 | [This repository](https://github.com/hasura/hasura-discord-docs-bot) contains the code for a Discord bot. There is a sprinkle of Nginx, a dash of Qdrant, some PostgreSQL, of course, Hasura, discord.py, a smidge of selenium and beautifulsoup, a pinch of FastAPI, and a lot of (mostly) production-ready Python with only a few dragons. 🐲
  4 | 
  5 | What might this bot do you wonder? I shall tell you! It's magic. 🧙‍♂️🪄
  6 | 
  7 | This Discord bot provides the ability to search and talk to technical documentation, or in theory any documentation.
  8 | 
  9 | I needed to write both a README and a blog post, so let's double up – such is life.
 10 | 
 11 | If you're more interested in running the bot yourself, see [the adjacent SETUP.md file](https://github.com/hasura/hasura-discord-docs-bot/blob/main/SETUP.md).
 12 | 
 13 | If you work for or use Hasura, ClickHouse, Turso, Fly.io, or Ionic, this bot will be useful to you, so come try it. If you use [Docusaurus](https://docusaurus.io/blog/releases/3.1), then the provided web-scraper should work for you. (Although Algolia has me curious, I found it halfway through this project. Does anyone know if it's any good? Can I get it to spit out Markdown?)
 14 | 
 15 | Also, I've always been a production or GTFO kind of person, therefore the database I've deployed for this has a READONLY api-key. The bot runs on the [Hasura Discord server](https://discord.gg/hasura). Just use `/commands` in a Discord channel to see the capabilities. To converse with the bot, ask a Hasura-related question in either the [v2-help-forum](https://discord.com/channels/407792526867693568/1205630815690817536) or the [v3-help-forum](https://discord.com/channels/407792526867693568/1205357708677480468).
 16 | 
 17 | Feel free to go over to [the production Qdrant dashboard](https://hasura-bots.com/dashboard) and use the following READONLY API key ```95a7cc2e3087b032edd2fd71de5b3a8d48313d601e6ce70b538ce5c8f730d93d``` to peruse the vector database points. (Hint: It is a database full of embeddings of scraped technical documentation written in markdown.)
 18 | 
 19 | **Watch the below video to see how it works**
 20 | 
 21 | [![Video](https://img.youtube.com/vi/wjZ4eOlNJqw/maxresdefault.jpg)](https://www.youtube.com/watch?v=wjZ4eOlNJqw)
 22 | 
 23 | ### Table of Contents
 24 | 
 25 | * [Beyond the Page: Crafting Our Own Conversational Google](#beyond-the-page-crafting-our-own-conversational-google)
 26 | * [The Motivation for This Project](#the-motivation-for-this-project)
 27 | * [Part 1: Scraping the Documentation](#part-1-scraping-the-documentation)
 28 | * [Part 2: Deploying the Database](#part-2-deploying-the-database)
 29 | * [Part 3: Designing the PostgreSQL Database](#part-3-designing-the-postgresql-database)
 30 | * [Part 4: The Backend API](#part-4-the-backend-api)
 31 |     * [The `/upload_documents` Endpoint](#the-uploaddocuments-endpoint)
 32 |     * [The `/new_message_event` Endpoint](#the-newmessageevent-endpoint)
 33 | * [Part 5: Building the Bot](#part-5-building-the-bot)
 34 | * [Part 6: Talking with the Bot](#part-6-talking-with-the-bot)
 35 | * [Some Final Thoughts](#some-final-thoughts)
 36 | 
 37 | ![ChatBot GIF](images/image1.gif "ChatBot GIF")
 38 | 
 39 | ### **Beyond the page: Crafting our own conversational Google**
 40 | 
 41 | A long time ago, these guys named Larry and Sergey decided to scrape the internet to organize the world's information, and it was hard. They also organized it about as well as a 5 year old might organize their chest of toys, by generally doing their best to throw similar things into the same bucket from across the room via a giant firehose. 
 42 | 
 43 | See, they didn't have [Word2Vec](https://en.wikipedia.org/wiki/Word2vec), and they definitely didn't have the new `text-embedding-3-large` embedding-model from OpenAI, or the `gpt-4-turbo-preview`, also known as `gpt-4-0125-preview`, which [as OpenAI puts it is](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo): "The latest GPT-4 model intended to reduce cases of “laziness” where the model doesn’t complete a task and returns a maximum of 4,096 output tokens" (which by the way has a 120,000 token context window). 
 44 | 
 45 | What a perfect and beautiful storm. It's not that the spider algorithm wasn't great for crawling the 🕸. I guess you could say the early Google team just didn't have a 🧠. Wait... no, that didn't come out right. What I mean to say is, using AI, we can sieve the data that comes through the firehose as it comes through, and create semantic embeddings via word2vec. And _then_ create our own purpose-specific mini-Google that comes with a support bot, kind of like Clippy, Siri, or Alexa (but if any of those were actually good), and currently minus the voice activation. But I could look into adding that, all that would take is a pinch of ffmpeg after all.
 46 | 
 47 | 
 48 | ### **The motivation for this project**
 49 | 
 50 | We should be able to completely automate every single support request that comes in that could be solved if the user had thoroughly read the entirety of the documentation. 
 51 | 
 52 | For every request that _can't_ be solved via an automated response, when a community member or support staff comes in and solves it, we can use that to train the AI for future iterations, and perhaps use the conversations to generate new or additional documentation.💡 
 53 | 
 54 | A tool such as this makes support an even more vital and helpful role, as it creates incentives to find correct reproducible solutions over quick solutions – even though currently support roles often are called in when things are on fire and people are in panic mode. This motivates all team members to properly solve each problem because all problems only need to be solved one time. 
 55 | 
 56 | Another bonus: By providing a searchable history of all previous inquiries, you can build the help you want to receive. I've not yet fine-tuned this bot, because I need people to create and vote on inquiries to collect the data. Conversational-style threads will be best, and it might be nice to curate the threads after the fact with a GPT comb-over. The result?
 57 | 
 58 | * One model fine-tuned to editorialize the conversation. 
 59 | * One model fine-tuned to create lists of disparities in docs. 
 60 | * One model fine-tuned to take all that information and pop out updated documentation. 
 61 | 
 62 | In this scenario, people become curators of information rather than rote secretaries. Information doesn't get lost, and you can copy customer-interaction flows. Almost like a compile step. Imagine taking 10 models and layering them to chunk down information. If you build good datasets of the workflow in action, you could revolutionize the way information moves.
 63 | 
 64 | It's a very different world today than it was when Google was first created. In the modern world, we have lots of very powerful tools at our disposal and the chips in our home computers are multiple orders of magnitude more capable than they were 25 years ago. 
 65 | 
 66 | A new age, a new era is upon us! To truly grasp what I'm getting at, look at how you can now visually identify semantic gaps in your documentation. 
 67 | 
 68 | You can understand where a user might get confused – just by looking at points that are far apart, or by not having a decent path to walk, or perhaps the path is just too long. Visualize the breadcrumbs and trails users will walk when they desire to do certain actions or have specific use cases and understand how you can support them before they even ask the questions.
 69 | 
 70 | 
 71 | ![alt_text](images/image2.gif "image_tooltip")
 72 | 
 73 | 
 74 | So without further ado, let's dive in. I've broken this into sections. It's nitty-gritty technical, but if you've made it this far, you're clearly my type of reader, so hang on, we’re going deep.
 75 | 
 76 | 
 77 | # Part 1: Scraping the documentation
 78 | 
 79 | Why build a scraper? Because then I could use it on things other than documentation, in theory. It's a proof-of-concept – I built this whole thing in a week, so let's not judge too harshly.
 80 | 
 81 | I started by building a quick-and-dirty ChatGPT collaborative hack-n-slash script that consisted of me running the script, seeing it get snagged or formating something in a way I didn't like, as it would write each page to a Markdown file, point the scraper at that page, then copy-paste the exception and the html, and occasionally hand-guide it a bit by inspecting the page and doing some beautifulsoup magic. 
 82 | 
 83 | It was mostly ChatGPT doing the dirty work and me trying to make it work as fast as I possibly could (I couldn't really justify spending more than a day on the scraper).
 84 | 
 85 | The scraper ended up being very specific to Docusaurus. I thought about trying to scrape other documentation sites, but then realized... Well, everybody's docs for the most part can be represented or easily turned into Markdown, that's on other people. If you want your docs added, send me a script to scrape your docs and structure them as Markdown, however you want them to appear. 
 86 | 
 87 | Some people have asked me, “But Tristen, why scrape the docs?” I wanted fully qualified links, not relative links, and I really wanted to test out how things worked with different companies' documentation, as well. Plus, don't hate on the selenium + bs4 combo – we use all kinds of tools internally. I figured the semantic search results with hyperlinks to the document were essentially a super-powered Google search, and when I fed the results to the latest GPT model the outputs were pretty great. The citation of sources is nice. What's nicer is being able to re-run the scraper to update things.
 88 | 
 89 | Although I may have accidentally taken down the [reactnative.dev docs site](https://reactnative.dev) for a good 10 minutes when I forgot to change the sleep time from 0.01 seconds to 0.5 seconds, as scraping the [fly.io](https://fly.io/docs/) docs literally FLEW. 😅ReactNative docs _would have_ been included in this project, if I wasn't scared to attempt scraping them when they came back up would get me blacklisted. (I'd be lying if I said I've never taken down a website before, but always by accident, and always because when you web-scrape you have to choose whether you want to be polite and patient, or rude but impatient, and sometimes my ADHD gets the best of me and I get impatient.)
 90 | 
 91 | In the end, I wanted my scraped docs to be as human-readable as possible while also being as bot-friendly as possible. Even complex tables and things. I ended up with something like this, which I think is pretty nice. Notice how even the links inside the tables work and are fully qualified.
 92 | 
 93 | [Here’s the actual page for the below Markdown.](https://hasura.io/docs/latest/databases/feature-support/)
 94 | 
 95 | ```markdown
 96 | # Database Feature Support
 97 | 
 98 | The below matrices show the database wise support for the different GraphQL features under schema, queries, mutations, and subscriptions.
 99 | 
100 | Tip
101 | 
102 | Each ✅ below links **directly** to the feature within a particular type of database.
103 | 
104 | ## Schema​
105 | 
106 | |  | Postgres | Citus | SQL Server | BigQuery | CockroachDB | CosmosDB |
107 | |---|---|---|---|---|---|---|
108 | | Table Relationships | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/table-relationships/index/) | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/table-relationships/index/) | [ ✅ ](https://hasura.io/docs/latest/schema/ms-sql-server/table-relationships/index/) | [ ✅ ](https://hasura.io/docs/latest/schema/bigquery/table-relationships/index/) | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/table-relationships/index/) | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/table-relationships/index/) |
109 | | Remote Relationships | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/remote-relationships/index/) | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/remote-relationships/index/) | [ ✅ ](https://hasura.io/docs/latest/schema/ms-sql-server/remote-relationships/index/) | [ ✅ ](https://hasura.io/docs/latest/schema/bigquery/index/) | [ ❌ ](https://hasura.io/docs/latest/databases/postgres/cockroachdb/hasura-cockroachdb-compatibility/#relationships) | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/remote-relationships/index/) |
110 | | Views | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/views/) | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/views/) | [ ✅ ](https://hasura.io/docs/latest/schema/ms-sql-server/views/) | ✅ | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/views/) | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/views/) |
111 | | Custom Functions | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/custom-functions/) | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/custom-functions/) | ❌ | ❌ | [ ❌ ](https://hasura.io/docs/latest/databases/postgres/cockroachdb/hasura-cockroachdb-compatibility/#functions) | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/custom-functions/) |
112 | | Enums | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/enums/) | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/enums/) | ❌ | ❌ | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/enums/) | ❌ |
113 | | Computed Fields | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/computed-fields/) | [ ❌ ](https://hasura.io/docs/latest/databases/postgres/citus-hyperscale-postgres/hasura-citus-compatibility/#computed-fields) | ❌ | [ ✅ ](https://hasura.io/docs/latest/schema/bigquery/computed-fields/) | [ ❌ ](https://hasura.io/docs/latest/databases/postgres/cockroachdb/hasura-cockroachdb-compatibility/#functions) | ❌ |
114 | | Data Validations | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/data-validations/) | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/data-validations/) | ✅ | [ ✅ ](https://hasura.io/docs/latest/schema/bigquery/data-validations/) | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/data-validations/) | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/data-validations/) |
115 | | Relay Schema | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/relay-schema/) | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/relay-schema/) | ❌ | ❌ | ❌ | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/relay-schema/) |
116 | | Naming Conventions | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/naming-convention/) | [ ❌ ](https://hasura.io/docs/latest/databases/postgres/citus-hyperscale-postgres/hasura-citus-compatibility/#naming-conventions) | ❌ | ❌ | [ ❌ ](https://hasura.io/docs/latest/databases/postgres/cockroachdb/hasura-cockroachdb-compatibility/#naming-conventions) | ❌ |
117 | | Custom Fields | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/custom-field-names/) | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/custom-field-names/) | [ ✅ ](https://hasura.io/docs/latest/schema/ms-sql-server/custom-field-names/) | [ ✅ ](https://hasura.io/docs/latest/schema/bigquery/custom-field-names/) | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/custom-field-names/) | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/custom-field-names/) |
118 | | Default Values | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/default-values/index/) | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/default-values/index/) | [ ✅ ](https://hasura.io/docs/latest/schema/ms-sql-server/default-values/index/) | ❌ | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/default-values/index/) | [ ✅ ](https://hasura.io/docs/latest/schema/postgres/default-values/index/) |
119 | ```
120 | 
121 | 
122 | 
123 | ![alt_text](images/image3.png "image_tooltip")
124 | 
125 | 
126 | 
127 | # Part 2: Deploying the database
128 | 
129 | I did this last, but I'll talk about it second. I developed with Qdrant running in Docker locally, but indeed, I did need to deploy it.
130 | 
131 | I deployed Qdrant on an E2-medium with 4GB RAM and 2vCPUs and slapped a 20TB SSD on as my boot drive. It was just a pretty basic Debian-12 bookworm VM. I was trying to be as lazy as I possibly could about it and not even register a domain, and I would've got away with it too, if it weren't for that meddling Qdrant client being picky about rejecting self-signed certs.  I did suck it up and register a domain name, add the DNS records, and get signed by a CA, as seen by the aforementioned site: [https://hasura-bots.com/dashboard](https://hasura-bots.com/dashboard).
132 | 
133 | I guess we should start from the beginning: Thankfully I pulled down the command history to do just that via `history > commands.txt`.
134 | 
135 | When working with a new machine, first I like to get acquainted with it, it's kind of like meeting an old friend. This used to mean using some combo of `ls`, `pwd`, and `cd ..` or `cd some_dir` until I found a good path to call home and set up shop. But nowadays I can copy-paste some random statistics about my machine from the Google Cloud Console into ChatGPT, and like the hero of the story that it is, it lets me know lots of fancy commands that print out things I can at this point somewhat kind of decipher enough about the outputs to figure out if it’s all systems go, or if everything is broken and I need to go turn it off and then on again, and after like three tries doing that if nothing else works just nuke it and switch to Ubuntu.
136 | 
137 | Hello Machine, it's nice to meet you!
138 | 
139 | ```shell
140 | sudo apt-get update
141 | cat /etc/os-release
142 | uname -r
143 | lscpo
144 | lscpu
145 | df -h
146 | ```
147 | 
148 | The next thing any good lazy person does is install Docker.
149 | 
150 | ```shell
151 | sudo apt install docker.io
152 | sudo systemctl enable --now docker
153 | sudo usermod -aG docker $USER
154 | newgrp docker
155 | docker --version
156 | docker run hello-world
157 | ```
158 | 
159 | 
160 | I think ChatGPT had intuited that I'd need to open ports, and so I gave its next suggestion a go, but I was pretty sure my next step would be to go to my firewall rules since I'm not actually running on metal, and I'm inside a VM...
161 | 
162 | ```shell
163 | tristen_harr@hasura-bots-qdrant:~$ sudo ufw allow 22
164 | sudo: ufw: command not found
165 | ```
166 | 
167 | 
168 | And I was right! Indeed, I was not using Uncomplicated Firewall, so as I had expected would happen, ChatGPT led me astray. But since I still knew where I was and where I needed to go, I could take over from the autopilot for a bit.
169 | 
170 | I booted the Docker container…
171 | 
172 | ```shell
173 | docker pull qdrant/qdrant
174 | docker run -p 6333:6333 -p 6334:6334     -v $(pwd)/qdrant_storage:/qdrant/storage:z     qdrant/qdrant
175 | ```
176 | 
177 | and to my great delight…
178 | 
179 | ```shell
180 |            _                 _    
181 |   __ _  __| |_ __ __ _ _ __ | |_  
182 |  / _` |/ _` | '__/ _` | '_ \| __| 
183 | | (_| | (_| | | | (_| | | | | |_  
184 |  \__, |\__,_|_|  \__,_|_| |_|\__| 
185 |     |_|                           
186 | 
187 | Access web UI at http://0.0.0.0:6333/dashboard
188 | ```
189 | 
190 | 
191 | I had Qdrant running with absolutely no security whatsoever in my VM, so I opened my firewall to `tcp:6333`. I was able to go to the IP, and of course got the "insecure" warning you can click around and voilà – I had the Qdrant dashboard. No domain-name, just going directly to the IP and port: `my.ip.address:6333/dashboard`.
192 | 
193 | Here is an obligatory photo of the insecure dashboard:
194 | 
195 | 
196 | ![alt_text](images/image4.png "image_tooltip")
197 | 
198 | 
199 | I thought, hooray, we're done for now, I'll just deal with this Monday and buy a domain name then! But I did need to get encrypted traffic. No biggie though, I'll self-sign, I tell myself. So I self-sign and then go about wading through the [authentication configuration info for Qdrant](https://qdrant.tech/documentation/guides/security/). I wasn't worried so much about obtaining the ever-so-precious lock. 🔒I know I'm not malicious traffic, I built every part of this from the ground up, so no worries there, and I figured openssl would do just fine.
200 | 
201 | ```shell
202 | sudo apt-get install openssl
203 | openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -days 365 -nodes -subj "/CN=my.public.ip.address"
204 | mkdir qdrant
205 | cd qdrant
206 | touch config.yaml
207 | vi config.yaml
208 | ```
209 | 
210 | 
211 | I don't have the vi history as I went to configure Qdrant, but long story short, I configured the stuff, self-signed, got things working over http, but then the API I'd already integrated and did not feel like ripping out to replace with an http request yelled at me about the certs. Oops.
212 | 
213 | The problem was that the client didn't like that I'd self-signed and didn't have a CA, which came back to bite me, when ultimately Let's Encrypt then refused to sign a certificate with the CA name pointed at an IP address. So I gave in and did things properly and bought a domain [hasura-bots.com](https://hasura-bots.com/dashboard), then configured the Cloud DNS with an A record pointing at my VM's public IP.
214 | 
215 | 
216 | ![alt_text](images/image5.png "image_tooltip")
217 | 
218 | 
219 | Then I did the certbot challenges to get a certificate that was signed by a CA. In the end, ChatGPT did spit out a fancy docker-compose that seemed to spin things up and supposedly would renew the certs for me. Although, I still probably ought to go back and reduce that TTL… there's always more to do in retrospect, isn't there? Oh well, for now, it'll stay short in case I need to make changes, or at least that's what I'll tell myself, so I can sleep at night.
220 | 
221 | Database deployment accomplished! Boss battle ends in victory, XP, and blog-post fodder galore!
222 | 
223 | (All in all, I'm making this sound much easier than it actually was. No matter how anybody makes it sound, this stuff isn't easy, as evidenced by the commands.txt file, which I dumped my command line history to when I started writing this post. It took me 327 individual commands to complete this task, although it's mostly still `ls` and `cd some_dir`. Deploying the database for production is often like the final boss battle of a project, in my opinion, especially if it's a database you've never deployed before. Anyone who tells you it's easy is either lying or a much higher-level wizard than I am. And FYI using a fully managed DBaaS doesn't count as deploying a database!)
224 | 
225 | 
226 | # Part 3: Designing the PostgreSQL database
227 | 
228 | I work for Hasura, and our company has built a pretty fantastic tool that I love to use when I'm building pretty much anything. This project is as simple as an event-driven project can be. There are five database tables, and one of them is a Hasura ENUM. I spun up a PostgreSQL database on Google's CloudSQL and created a new project on Hasura Cloud, and got down to work. This database was much easier to deploy, it’s just a few button clicks.
229 | 
230 | The first table, the aptly titled `COLLECTION_ENUM`, tracks the Qdrant collections we currently have in our Qdrant database.
231 | 
232 | 
233 | ![alt_text](images/image6.png "image_tooltip")
234 | 
235 | 
236 | 
237 | ![alt_text](images/image7.png "image_tooltip")
238 | 
239 | 
240 | The entries in this table map one-to-one with a document collection. The document collections were uploaded using a script that took the scraped documents, and utilized an endpoint in the `hasura_discord_backend` directory, but we'll get to that in a bit. Let's first view the forest, and I promise that soon we shall walk amongst the trees.
241 | 
242 | The idea for the main loop of the bot is simple, we have a help forum, and perhaps 90% of the questions I see in it could be answered by someone who had thoroughly read the entirety of the documentation. The problem is there isn't a person on earth who's thoroughly read EVERY piece of documentation. It would be nice if whenever somebody posted a question on the help forum, a bot could search through all of our documentation and attempt to provide a solution from what it finds. 
243 | 
244 | To do this, we can pull in the top five semantically similar documents to the user's query, throw them at ChatGPT as if the assistant had surfaced them, and  then ask the assistant to provide a new message solving the query. If the user then continues the conversation with the bot by pinging the bots username, we will collect the entire conversational back-and-forth and feed it to ChatGPT, stripping out any previously surfaced results, and doing a new search for the five most relevant documents for the conversation. 
245 | 
246 | Think of it like a sliding window over the conversation, we don't want to muddle up the bot by providing links to previously surfaced docs because then it'll bias the search to resurface those. So instead we track the conversations and the sources separately.
247 | 
248 | To make this work, we need to track the state of each of these forum questions, which we will call threads. So we have a thread table, which has the following fields:
249 | 
250 | 
251 | 
252 | * created_at: When the thread was created
253 | * updated_at: When the thread last had activity, this will automatically get updated for us when we make changes
254 | * open: If the thread is open or not, aka if it is archived in Discord
255 | * solved: If the thread has been marked as solved
256 | * thread_id: The ID of the thread, it's actually just a stringified bigint from Discord's underlying ID system.
257 | * title: The title the user gives the thread
258 | * collection: The collection the forum is associated with
259 | * thread_controller_id: The ID of the first message sent by the bot in the thread that we listen for votes on
260 | * author_id: The user who started the thread, who is also allowed to mark it as solved
261 | * solved_votes: The number of votes the first message has received for solved
262 | * failed_votes: The number of votes the first message has received for failed
263 | 
264 | These threads will also have messages, which we can keep in our database to make it easy to rebuild the conversation. The way the bot has been designed, we want to use an event loop to ensure that the messages get sent from the bot, and we can't afford to wait inside the bot code or put everything in a long-running function since Discord limits the time you have until you need to return a response. So we will utilize transactional polling.
265 | 
266 | The messages table has the following fields:
267 | 
268 | 
269 | 
270 | * thread_id: The ID of the thread the message belongs to
271 | * message_id: The ID of the message, in this case a UUID, since we insert the message before Discord sends it
272 | * content: The text body of the message
273 | * from_bot: A boolean that is True if the message was sent from the bot, otherwise False
274 | * created_at: The time the message was created
275 | * updated_at: The last time the message was updated
276 | * first_message: A boolean that is True if this is the first message sent in the thread - the first message always gets a reply
277 | * mentions_bot: A boolean that is true if this message mentions the bot and should generate a reply
278 | * sources: A text field that will contain the list of source citations, that's nullable as it will be null at first
279 | * processed: A boolean that will be used to do a transactional poll and process the messages
280 | 
281 | What do I mean by a transactional poll? In the Discord bot event loop that runs every 1 second, I will run the followingGraphQL. (The resolvers which have been so graciously provided via Hasura.)
282 | 
283 | ```graphql
284 | mutation ProcessMessages {
285 |     update_message(where: {from_bot: {_eq: true}, processed: {_eq: false}}, _set: {processed: true}) {
286 |         returning {
287 |             content
288 |             created_at
289 |             first_message
290 |             from_bot
291 |             mentions_bot
292 |             message_id
293 |             processed
294 |             sources
295 |             thread_id
296 |             updated_at
297 |             thread {
298 |                 thread_controller_id
299 |                 author_id
300 |             }
301 |         }
302 |     }
303 | }
304 | ```
305 | 
306 | 
307 | This GraphQL will drive the event loop, and the astute reader might be starting to see the way things will come together. The workflow will be:
308 | 
309 | 
310 | 
311 | 1. A user posts a message in the help-forum
312 | 2. The Discord bot, which is listening for new messages creates a new thread in the database and forwards the messages
313 | 3. The Hasura Engine kicks off an event when a new message is created, which calls the backend written in FastAPI
314 | 4. If the conditions are met to generate a new message, the backend searches the Qdrant embeddings, collects the results, and inserts a new message generated by ChatGPT from the bot with processed = False
315 | 5. The bots event loop picks up the new message and sends it into the Discord channel using the transactional poll
316 | 
317 | While it might seem complicated and over-built, and perhaps it is, it's durable, and it will be exceedingly easy to add onto in the future. 
318 | 
319 | It's also idempotent. For example, say the bot goes offline, when it starts back up, it will deliver any undelivered messages that were being processed when it went offline. Plus, it's got a built-in self-feeding training loop with the tracking of user votes. It'll be pretty simple to collect the message threads after they've been solved, and use them as future training data to fine-tune a GPT model.
320 | 
321 | The last two tables I added on after the fact to move some configuration out of a `contstants.py` file that had been hard-coded to make it easier for people to run this project themselves.
322 | 
323 | The configuration table:
324 | 
325 | 
326 | 
327 | * guild_id: The ID of a guild the bot is in
328 | * logging_channel_id: The ID of the channel the bot should log to for that guild
329 | * mod_role_id: The ID of that guilds moderator role
330 | * banned_user_ids: A list of users who are banned from using the bot in this guild
331 | 
332 | The guild_forums table:
333 | 
334 | 
335 | 
336 | * guild_id: The ID of the guild the bot is in
337 | * forum_channel_id: The ID of the forum channel that the bot auto-responds in
338 | * forum_collection: The collection the forum channel searches
339 | 
340 | 
341 | # Part 4: The backend API
342 | 
343 | Our backend API has **three endpoints**.
344 | 
345 | They are:
346 | 
347 | 
348 | 
349 | 1. `/upload_documents` – A endpoint to upload a document to Qdrant
350 | 2. `/new_message_event` – A endpoint that Hasura calls when it gets a new message
351 | 3. `/search`– An extra endpoint to provide a command to simply vector-search the sources without ChatGPT
352 | 
353 | I'll now go over the first two endpoints, which happen to be the most important and share a bit of code.
354 | 
355 | 
356 | ## The `/upload_documents` endpoint
357 | 
358 | Originally, the documents were a list that I uploaded as a batch, and while I could've used uuids in the [Qdrant vector database connector](https://github.com/hasura/ndc-qdrant) I have been working on, I made the ID's integers since I had to choose a default string or integer, and I happened to choose integer. However, I didn't like just randomly generating an integer like you would with uuid's since I'd likely end up with collisions. 
359 | 
360 | So the IDs ended up being chronological, so you pass the endpoint a single document, it will chunk it and upload as many points as it takes to fully embed the document and return the ID the next point should use. Inefficient, sure, but it gets the job done, and it's pretty simple to fix up into a batch endpoint later.
361 | 
362 | To do this, first I made some Pydantic models.
363 | 
364 | ```python
365 | DocumentSources = Literal[
366 |     'Docs V2',
367 |     'Docs V3',
368 |     'Discord',
369 |     'ZenDesk',
370 |     'StackOverflow',
371 |     'Reddit',
372 |     'Slack',
373 |     "Docs fly.io",
374 |     "Docs Turso",
375 |     "Docs Ionic"
376 | ]
377 | 
378 | 
379 | class CreateDocumentForm(BaseModel):
380 |     uid: int
381 |     body: str
382 |     source: DocumentSources
383 |     url: str
384 |     tags: List[str] = []
385 | 
386 | 
387 | class UploadDocumentsRequest(BaseModel):
388 |     collection: str
389 |     document: CreateDocumentForm
390 | ```
391 | 
392 | 
393 | Then I designed an endpoint to upload the documents, here's the function that powers things, it's quite simple. To be fair, I still need to go and add a client-pool for the Qdrant and OpenAI clients, but for now this will work. And since the bot spins down frequently as the API is stateless and hosted on Cloud Run, it might indeed be just fine or perhaps even better to do it this way. If more often than not it's cold-starting, the overhead for setting up a client-pool is slightly larger than to just open a single connection, ultimately reducing the efficiency.
394 | 
395 | ```python
396 | from models import *
397 | from utilities import *
398 | from constants import *
399 | from qdrant_client.http.models import Distance, VectorParams, PointStruct
400 | from qdrant_client.http.exceptions import UnexpectedResponse
401 | 
402 | 
403 | async def do_upload_documents(documents: UploadDocumentsRequest):
404 |     collection = documents.collection
405 |     qdrant_client = get_qdrant_client()
406 |     openai_client = get_openai_client()
407 |     try:
408 |         await qdrant_client.get_collection(collection_name=collection)
409 |     except UnexpectedResponse:
410 |         await qdrant_client.create_collection(
411 |             collection_name=collection,
412 |             vectors_config=VectorParams(size=VECTOR_SIZE, distance=Distance.COSINE)
413 |         )
414 |     doc = documents.document
415 |     chunks = chunk_document(doc.body)
416 |     offset = 0
417 |     initial_id = doc.uid
418 |     for c in chunks:
419 |         embed = await openai_client.embeddings.create(input=c, model=OPENAI_EMBEDDING_MODEL)
420 |         vector = embed.data[0].embedding
421 |         parent = None
422 |         if offset > 0:
423 |             parent = initial_id + offset - 1
424 |         await qdrant_client.upload_points(collection_name=collection,
425 |                                           points=[PointStruct(
426 |                                               id=initial_id + offset,
427 |                                               vector=vector,
428 |                                               payload={
429 |                                                   "source": doc.source,
430 |                                                   "parent": parent,
431 |                                                   "tags": doc.tags,
432 |                                                   "url": doc.url,
433 |                                                   "body": c
434 |                                               }
435 |                                           )])
436 |         offset += 1
437 |     return offset
438 | ```
439 | 
440 | 
441 | I was able to use this API endpoint to upload the scraped collections from Part 1 into Qdrant at this point. I watched as my API endpoint sang for a while.
442 | 
443 | ```shell
444 | INFO:     127.0.0.1:51525 - "POST /upload_documents/ HTTP/1.1" 200 OK
445 | INFO:     127.0.0.1:51531 - "POST /upload_documents/ HTTP/1.1" 200 OK
446 | INFO:     127.0.0.1:51537 - "POST /upload_documents/ HTTP/1.1" 200 OK
447 | INFO:     127.0.0.1:51544 - "POST /upload_documents/ HTTP/1.1" 200 OK
448 | INFO:     127.0.0.1:51549 - "POST /upload_documents/ HTTP/1.1" 200 OK
449 | ```
450 | 
451 | After a while of the uploads running in the background, I had the collections in Qdrant. 🎉
452 | 
453 | 
454 | ![alt_text](images/image8.png "image_tooltip")
455 | 
456 | 
457 | 
458 | ## The `/new_message_event` endpoint
459 | 
460 | Next step was to build the endpoint that will do the AI magic, which ultimately is also relatively simple. I really enjoy building event-driven workflows using Hasura and Hasura events, because it makes it easy to break things down into bite-sized pieces and makes the code for even complex things read about as easily as you might read a book. Here's the code.
461 | 
462 | ```python
463 | from models import *
464 | from utilities import *
465 | from constants import *
466 | from uuid import uuid4
467 | 
468 | 
469 | async def do_new_message_event(data: Event):
470 |     qdrant_client = get_qdrant_client()
471 |     openai_client = get_openai_client()
472 |     message_data = data.event.data.new
473 |     # If the message is "from the bot" i.e. this same endpoint inserts it, just return. This prevents a recursive event
474 |     if message_data["from_bot"]:
475 |         return
476 |     else:
477 |         # If this thread warrants a response, we should look up the threads' data.
478 |         if message_data["first_message"] or message_data["mentions_bot"]:
479 |             thread_data = await execute_graphql(GRAPHQL_URL,
480 |                                                 GET_THREAD_GRAPHQL,
481 |                                                 {"thread_id": message_data["thread_id"]},
482 |                                                 GRAPHQL_HEADERS)
483 |             thread = thread_data.get("data", {}).get("thread_by_pk", None)
484 |             if thread is None:
485 |                 return
486 |             title = thread.get("title")
487 |             collection = thread.get("collection")
488 |             messages = [
489 |                 {"role": "system",
490 |                  "content": SYSTEM_PROMPT
491 |                  }
492 |             ]
493 |             vector_content = ""
494 |             for i, message in enumerate(thread.get("messages")):
495 |                 if i == 0:
496 |                     message["content"] = ROOT_QUERY_FORMAT.format(title=title, content=message["content"])
497 |                 new_message = {
498 |                     "role": "assistant" if message_data["from_bot"] else "user",
499 |                     "content": message["content"]
500 |                 }
501 |                 messages.append(new_message)
502 |                 vector_content += new_message["content"] + "\n"
503 | 
504 |                 # A shortcoming of this bot is that once the context extends past the embedding limit for the
505 |                 # conversation, the bot will keep resurfacing the same results but that's not so terrible for now.
506 |             embed = await openai_client.embeddings.create(input=vector_content, model=OPENAI_EMBEDDING_MODEL)
507 |             vector = embed.data[0].embedding
508 |             results = await qdrant_client.search(collection,
509 |                                                  query_vector=vector,
510 |                                                  limit=5,
511 |                                                  with_payload=["url", "body"])
512 |             # Construct the formatted inputs for the AI model
513 |             formatted_text = ""
514 |             search_links = ""
515 |             for i, result in enumerate(results):
516 |                 formatted_text += RAG_FORMATTER.format(num=i + 1,
517 |                                                        url=result.payload["url"],
518 |                                                        score=result.score,
519 |                                                        body=result.payload["body"])
520 |                 search_links += SEARCH_FORMATTER.format(num=i + 1,
521 |                                                         score=result.score,
522 |                                                         url=result.payload["url"])
523 |             result_text = ASSISTANT_RESULTS_WRAPPER.format(content=formatted_text)
524 |             messages.append({
525 |                 "role": "assistant",
526 |                 "content": result_text
527 |             })
528 |             # Generate the results using OpenAI's API
529 |             completion = await openai_client.chat.completions.create(
530 |                 model=OPENAI_MODEL,
531 |                 messages=messages
532 |             )
533 |             result = completion.choices[0].message.content
534 |             # Add a message to the thread.
535 |             variables = {
536 |                 "object": {
537 |                     "thread_id": message_data["thread_id"],
538 |                     "message_id": str(uuid4()),
539 |                     "content": result,
540 |                     "from_bot": True,
541 |                     "first_message": False,
542 |                     "mentions_bot": False,
543 |                     "sources": search_links,
544 |                     "processed": False
545 |                 }
546 |             }
547 |             await execute_graphql(GRAPHQL_URL,
548 |                                   INSERT_MESSAGE_GRAPHQL,
549 |                                   variables,
550 |                                   GRAPHQL_HEADERS)
551 | 
552 | ```
553 | 
554 | 
555 | I set up a Hasura event that would call my API endpoint whenever a new message was inserted into the message table, and of course forwarded the headers, which I pulled from the environment variables, which I stored the backend secret in.
556 | 
557 | 
558 | ![alt_text](images/image9.png "image_tooltip")
559 | 
560 | ![alt_text](images/image10.png "image_tooltip")
561 | 
562 | 
563 | That's pretty much the entire backend API. There is also a search endpoint that I linked to a slash command to let users search the documentation, but if you're interested in that, just read the code.
564 | 
565 | 
566 | # Part 5: Building the bot
567 | 
568 | There are five parts to this, as it took me five days to build this bot. What started out as the following flowchart I made on draw.io when this project was just an idea, was finally about to become reality. (Mostly, with some minor changes to the original flowchart.)
569 | 
570 | 
571 | ![alt_text](images/image11.png "image_tooltip")
572 | 
573 | 
574 | Building the bot was much easier than I remembered, which I'm unsure if that had to do with Discord's APIs improving or me simply upskilling over the years as the last time I'd built a Discord bot was circa 2018/2019-ish. I won't go over all the code, but I will detail some of the fun parts.
575 | 
576 | The main things that mattered were listening to the `on_message` event, and the task loop.
577 | 
578 | When a new message comes in, we need to create that message in the database to trigger the event in the backend API. This is handled by registering a listener that will fire on all messages and filtering to only handle the messages we actually care about.
579 | 
580 | ```python
581 | @client.event
582 | async def on_message(message: Message):
583 |     """
584 |     Each time a message is sent, this fires.
585 | 
586 |     :param message: The incoming message
587 |     :return: The return from the linked handler function
588 |     """
589 |     if message.author.id in BANNED:
590 |         await message.channel.send(content=f"Silly <@{message.author.id}>, you've misbehaved and have been BANNED. 🔨")
591 |         return
592 |     return await event_on_message(client, message)
593 | ```
594 | 
595 | 
596 | Once the message has been sent, it triggers the event on the backend, which will create a reply and the transactional poll to process messages is performed inside the task loop.
597 | 
598 | ```python
599 | @tasks.loop(seconds=1, count=None, reconnect=True)
600 | async def task_loop():
601 |     """
602 |     The main task loop.
603 | 
604 |     This is an event loop that runs every 1 second. It runs a transactional mutation to collect any unpublished messages
605 | 
606 |     If for some reason the task_loop fails, the message won't get sent. This is not a huge deal, the user can ask again.
607 |     That shouldn't happen, however, doing this on a transactional poll like this is useful to ensure no more than
608 |     once delivery, and aim for at least once.
609 |     :return: The linked task loop
610 |     """
611 |     return await execute_task_loop(client)
612 | ```
613 | 
614 | 
615 | The task loop makes sure that the replies get handled and sent in the Discord channel.
616 | 
617 | ```python
618 | from utilities import *
619 | from constants import *
620 | import discord
621 | 
622 | 
623 | async def execute_task_loop(client: discord.Client):
624 |     """
625 |     This is the main task loop.
626 |     :param client: The discord client. (essentially a singleton)
627 |     :return: None
628 |     """
629 |     # Get all tasks
630 |     result = await execute_graphql(GRAPHQL_URL,
631 |                                    PROCESS_MESSAGES_GRAPHQL,
632 |                                    {},
633 |                                    GRAPHQL_HEADERS)
634 |     # If False result skip as this was a failure.
635 |     if not result:
636 |         return
637 |     # Collect the list of tasks
638 |     all_tasks = result["data"]["update_message"]["returning"]
639 |     for task in all_tasks:
640 |         thread_id = task["thread_id"]
641 |         content = task["content"]
642 |         sources = task["sources"]
643 |         thread = task["thread"]
644 |         thread_controller_id = thread["thread_controller_id"]
645 |         thread_author_id = thread["author_id"]
646 |         channel = client.get_channel(int(thread_id))
647 |         controller = await channel.fetch_message(int(thread_controller_id))
648 |         await send_long_message_in_embeds(channel=channel,
649 |                                           title=RESPONSE_TITLE,
650 |                                           message=content)
651 |         await send_long_message_in_embeds(channel=channel,
652 |                                           title=RESPONSE_SOURCES_TITLE,
653 |                                           message=sources,
654 |                                           color=discord.Color.green())
655 |         help_controller_message = HELP_CONTROLLER_MESSAGE.format(author=thread_author_id,
656 |                                                                  bot=client.user.id,
657 |                                                                  github=GITHUB_LINK)
658 |         controller = await controller.edit(embed=discord.Embed(title=CONTROLLER_TITLE,
659 |                                                                description=help_controller_message,
660 |                                                                color=discord.Color.gold()))
661 |         await controller.add_reaction(POSITIVE_EMOJI)
662 |         await controller.add_reaction(NEGATIVE_EMOJI)
663 | ```
664 | 
665 | 
666 | There were also a handful of slash commands I added, which running the `/commands` command in the Discord server will list out the different available commands and what they do.
667 | ![alt_text](images/image12.png "image_tooltip")
668 | 
669 | 
670 | 
671 | # Part 6: Talking with the bot
672 | 
673 | The last thing to do is to talk to the bot! 
674 | 
675 | 
676 | ![alt_text](images/image13.gif "image_tooltip")
677 | 
678 | 
679 | 
680 | # Some final thoughts
681 | 
682 | I've just finished the first draft of this, and I'm not sure how much editing I'll do to it. Perhaps it'll be better to keep it a bit rough around the edges. I had a lot of fun building this, and hopefully, it will be of use to our community. The code is all OSS, so feel free to run this bot yourself.
683 | 
684 | If you liked this, and enjoyed the way I shoved it into a GitHub README, consider dropping a star on the repo, and maybe I can convince my employer to let me write all my blog posts in a README located adjacent to the code.
685 | 
686 | Want more? [Read my previous piece, The Architect's Dilemma: Navigating the world of GraphQL](https://hasura.io/blog/the-architects-dilemma-navigating-the-world-of-graphql/).
687 | 
688 | [Follow me on Twitter](https://twitter.com/TristenHarr)
689 | 
690 | 
691 | 


--------------------------------------------------------------------------------
/SETUP.md:
--------------------------------------------------------------------------------
  1 | # Setting up the bot
  2 | 
  3 | So, you want to run your very own Discord Bot?
  4 | 
  5 | Let's talk setup. Lucky for you, this bot is pre-made in a way you can run it locally to experiment.
  6 | 
  7 | Please follow these steps **before** you run the `docker-compose up`
  8 | 
  9 | ### Prerequisites:
 10 | 
 11 | 1. You need Docker installed and the Docker Daemon running in the background
 12 | 2. You need an OpenAI API Key and Organization ID
 13 | 3. You need a Discord Server you want to run the bot in!
 14 | 
 15 | ### Step 1: Creating a Discord Bot
 16 | 
 17 | The very first thing you need to do is create a Discord Application.
 18 | 
 19 | 1. Sign in to the [Discord Developer Portal](https://discord.com/developers/applications) and go to the Applications
 20 |    page.
 21 | 
 22 | 
 23 | 2. Click the "New Application" button.
 24 | 
 25 | ![New Application](images/setup/new_application.jpeg)
 26 | 
 27 | 3. Create a name for your bot, agree to the ToS and click Create!
 28 | 
 29 | ![Name Application](images/setup/name_application.png)
 30 | 
 31 | 4. On the Bot tab, turn off "Public Bot", and turn on all the options in "Privileged Gateway Intents"
 32 | 
 33 | ![Bot Settings](images/setup/bot_settings.png)
 34 | 
 35 | 5. Under the OAuth2 tab, go to the URL Generator, and for scopes select bot, and for bot permissions select
 36 |    Administrator. (You can narrow the scopes if you wish, I typically just give my bots Admin especially if I'm planning
 37 |    to continue developing on them)
 38 | 
 39 | ![Bot Scopes](images/setup/set_scopes.png)
 40 | 
 41 | 6. Copy the generated URL from the bottom of the page, and go into the Discord server you will run the bot in and paste
 42 |    it there.
 43 | 
 44 | ![Generated URL](images/setup/generated_url.png)
 45 | 
 46 | 7. Click on the URL inside the Discord server, and then follow the prompts to add the bot to the server.
 47 | 
 48 | ![Add Bot To Server](images/setup/add_bot_to_server.png)
 49 | 
 50 | 8. Go back to the Bot page in the Discord Developer Portal and click the Reset Token button.
 51 | 
 52 | ![Reset Token](images/setup/reset_token.jpeg)
 53 | 
 54 | 9. Copy the new Token and keep it somewhere safe, you'll need it in a minute! We'll refer to this as the "Discord Bot
 55 |    Token" later on.
 56 | 
 57 | ![Token](images/setup/token.png)
 58 | 
 59 | ### Step 2: Setting up the Discord Server for the Bot, and updating the database seed data.
 60 | 
 61 | The Discord bot comes preloaded with a collection of Hasura's V3 documentation as an example in Qdrant. This collection
 62 | is called the v3 collection.
 63 | 
 64 | The way this bot currently works is that you need to seed the database with the configuration.
 65 | 
 66 | In order to make this process simple, I wrote a script called seed.py, and provide a template JSON file so that you can
 67 | configure the seed data.
 68 | 
 69 | This is the seed data that you will edit located at `seed.json`:
 70 | 
 71 | ```json
 72 | {
 73 |   "COLLECTION_ENUM": [
 74 |     {
 75 |       "value": "v3"
 76 |     }
 77 |   ],
 78 |   "configuration": [
 79 |     {
 80 |       "guild_id": YOUR_GUILD_ID,
 81 |       "logging_channel_id": YOUR_LOGGING_CHANNEL_ID,
 82 |       "mod_role_id": YOUR_MOD_ROLE_ID,
 83 |       "banned_user_ids": []
 84 |     }
 85 |   ],
 86 |   "guild_forums": [
 87 |     {
 88 |       "guild_id": YOUR_GUILD_ID,
 89 |       "forum_channel_id": YOUR_V3_FORUM_CHANNEL_ID,
 90 |       "forum_collection": "v3"
 91 |     }
 92 |   ]
 93 | }
 94 | ```
 95 | 
 96 | In Discord, you will need 1 Forum channel, 1 Text channel, and 1 User Role that will serve as a bot moderator role. You
 97 | will also need the Guild ID.
 98 | 
 99 | 1. **To Collect the Guild ID**
100 | 
101 | Right-click on the Server name, and click "Copy Server ID". My Guild ID for example is: `1204239405011832883`. In
102 | the `seed.json` file, fill in the above template replacing `YOUR_GUILD_ID` with the Guild Id.
103 | 
104 | ![Server ID](images/setup/server_id.png)
105 | 
106 | 2. **To get the ID of the Logging Channel**
107 | 
108 | Pick a text-channel you want the bot to log things to. Right-click on the channel, and click "Copy Channel ID". My
109 | channel ID for example is `1204239405456297995`. In the `seed.json` file, fill in the above template
110 | replacing `YOUR_LOGGING_CHANNEL_ID` with the Channel ID.
111 | 
112 | ![Logging channel](images/setup/logging_channel.png)
113 | 
114 | 3. **To get the ID of the Forum Channel**
115 | 
116 | Pick a forum-channel the bot will link to the v3 collection. You can add more forum-channels and collections later.
117 | Right-click on the forum and click "Copy Channel ID". My forum channel ID for example is: `1204246613145419806`. In
118 | the `seed.json` file, fill in the above template replacing `YOUR_V3_FORUM_CHANNEL_ID` with the Channel ID.
119 | 
120 | ![Forum Channel](images/setup/forum_channel.png)
121 | 
122 | 4. **To get the Moderator Role ID**
123 | 
124 | Right-click on the server, and go to server settings, then under Roles, right-click a role that you want the bot to
125 | recognize as a moderator, and click "Copy Role ID". My Role ID for example is `1209640032747528202`. In
126 | the `seed.json` file, fill in the above template replacing `YOUR_MOD_ROLE_ID` with the Channel ID.
127 | 
128 | ![Server Settings](images/setup/server_settings.png)
129 | 
130 | ![Server Moderator Role](images/setup/role_id.png)
131 | 
132 | My `seed.json` which is what ships with the project looks like after being filled out:
133 | 
134 | ```json
135 | {
136 |   "COLLECTION_ENUM": [
137 |     {
138 |       "value": "v3"
139 |     }
140 |   ],
141 |   "configuration": [
142 |     {
143 |       "guild_id": 1204239405011832883,
144 |       "logging_channel_id": 1204239405456297995,
145 |       "mod_role_id": 1209640032747528202,
146 |       "banned_user_ids": []
147 |     }
148 |   ],
149 |   "guild_forums": [
150 |     {
151 |       "guild_id": 1204239405011832883,
152 |       "forum_channel_id": 1204246613145419806,
153 |       "forum_collection": "v3"
154 |     }
155 |   ]
156 | }
157 | ```
158 | 
159 | Once you have finished filling out the JSON template, you can run the seed.py file. This script will overwrite the file
160 | located at: `hasura_discord_postgres/data/seed.sql` which is used by the Postgres instance to seed the database.
161 | 
162 | In your terminal run:
163 | 
164 | ```shell
165 | python seed.py
166 | ```
167 | 
168 | (You could also directly edit the seed.sql file if you wanted, they both achieve the same results. I just find it easier
169 | to work with JSON than writing SQL COPY bulk insert ops by hand)
170 | 
171 | ### Step 3: Configuring the `.env` file
172 | 
173 | Create a .env file, making a copy of the `.env.example` file provided.
174 | 
175 | ```shell
176 | cp .env.example .env
177 | ```
178 | 
179 | Update the last 4 variables in the .env file.
180 | 
181 | Replace `<GUILD_ID>` with the Guild ID from step 2.1
182 | 
183 | Replace `<CLIENT_SECRET>` with the "Discord Bot Token" from step 1.9
184 | 
185 | Replace `<API_KEY>` with an OpenAI API Key.
186 | 
187 | Replace `<ORGANIZATION_ID>` with the OpenAI Organization ID.
188 | 
189 | ```
190 | GUILD_ID=<GUILD_ID>
191 | DISCORD_CLIENT_SECRET=<CLIENT_SECRET>
192 | OPENAI_API_KEY=<API_KEY>
193 | OPENAI_ORGANIZATION=<ORGANIZATION_ID>
194 | ```
195 | 
196 | Once you've completed filling out those 4 variables in the `.env` file, you are ready to run the bot!
197 | 
198 | ### Step 4: Running the bot
199 | 
200 | Run ```docker-compose up```
201 | 
202 | It might take a minute or two for everything to startup. You should be able to see the containers inside Docker.
203 | If things have started successfully, the logging channel should receive a message from your bot.
204 | 
205 | ![Bot Message](images/setup/bot_message.png)
206 | 
207 | A note about the `docker-compose.yaml`: The hasura_discord_bot service passes an environment variable
208 | called `SYNC_ON_STARTUP` which is set to 1. It is not recommended that the bot sync its commands every startup, however
209 | you must sync the `/update` command at some point in order to perform future syncs. To overcome this, when the container
210 | for the hasura_discord_bot service is started with environment variable SYNC_ON_STARTUP set to 1, the bot will sync on
211 | startup. After the bot has synced, and you have access to the `/update` command in the Discord server, you can remove
212 | this environment variable to follow recommended practices and avoid rate-limits.
213 | 
214 | ### Testing the bot
215 | 
216 | The first test you can send to the bot, is a simple test. Use the `/hello` command. You should be able to see the
217 | commands via a dropdown.
218 | 
219 | ![Hello Command](images/setup/hello.png)
220 | 
221 | The bot should reply with `Hello World!`
222 | 
223 | ![Hello World](images/setup/hello_world.png)
224 | 
225 | After that, you can use the `/commands` command to learn more about how the bot works.
226 | 
227 | ![Commands](images/setup/commands.png)
228 | 
229 | One important test, is to see if the search works. You should be able to search the `v3` collection.
230 | 
231 | ![Search](images/setup/search.png)
232 | 
233 | This should result in something like this.
234 | 
235 | ![img.png](images/setup/search_results.png)
236 | 
237 | For some reason, this search is occasionally flaky when the whole thing starts up for the first time. I think it
238 | might've been because the underlying Qdrant client initially had a timeout of 5 seconds. (Not sure why it would exceed
239 | the timeout though?) I **think** increasing this to 60 seconds fixed it, but if you spot an Internal Service Error,
240 | please open an issue and let me know!
241 | 
242 | Next, you can go into the forum channel, and create a post.
243 | 
244 | ![Test Post](images/setup/test_post.png)
245 | 
246 | The bot should respond back to you, and you can converse with it.
247 | 
248 | ![Bot Talks](images/setup/bot_talk.png)
249 | 
250 | You can go to the Qdrant dashboard by navigating in a browser to: `http://localhost:6333/dashboard`
251 | 
252 | ![Qdrant Dashboard](images/setup/qdrant_dashboard.png)
253 | 
254 | You can go to the Hasura Console by navigating in a browser to: `http://localhost:8080/console` and entering the default
255 | value: `secret`. (This is set in the `.env` file.)
256 | 
257 | ![Hasura Console](images/setup/hasura_console.png)
258 | 
259 | You can run this query.
260 | 
261 | ```graphql
262 | query Q {
263 |     configuration {
264 |         guild_id
265 |         logging_channel_id
266 |         mod_role_id
267 |         banned_user_ids
268 |     }
269 |     guild_forums {
270 |         guild_id
271 |         forum_channel_id
272 |         forum_collection
273 |     }
274 |     thread {
275 |         thread_id
276 |         title
277 |         open
278 |         solved
279 |         created_at
280 |         collection
281 |         author_id
282 |         messages {
283 |             content
284 |             from_bot
285 |             sources
286 |         }
287 |     }
288 | }
289 | ```
290 | 
291 | If you want to watch the bot working in action, run this subscription, then watch as you create a new post:
292 | 
293 | ```graphql
294 | subscription Threads {
295 |     thread(limit: 10, order_by: {created_at: desc}) {
296 |         thread_id
297 |         title
298 |         open
299 |         solved
300 |         created_at
301 |         collection
302 |         author_id
303 |         messages {
304 |             content
305 |             from_bot
306 |             sources
307 |             processed
308 |         }
309 |     }
310 | }
311 | ```
312 | 
313 | You can also see the API docs generated for the FastAPI backend in your choice of style:
314 | 
315 | Swagger: `http://localhost:8100/docs`
316 | 
317 | ![Swagger](images/setup/swagger.png)
318 | 
319 | Redoc: `http://localhost:8100/redoc`
320 | 
321 | ![Redoc](images/setup/redoc.png)
322 | 
323 | Hooray! You finished the Setup!
324 | 
325 | ### Adding document collections:
326 | 
327 | There are some useful scripts lying around...
328 | 
329 | See: `hasura_discord_backend/scripts/scrape_docusaurus.py` for the script I used to scrape documentation.
330 | 
331 | The requirements file for this can be found at: `hasura_discord_backend/scripts/scrape_docusaurus_requirements.txt`
332 | 
333 | You can install these via:
334 | 
335 | `pip3 install -r hasura_discord_backend/scripts/scrape_docusaurus_requirements.txt`
336 | 
337 | Also in the `hasura_discord_backend` directory you will find some upload scripts, these upload the scraped data from a
338 | JSON file to the Qdrant database. See the API code for more details.


--------------------------------------------------------------------------------
/commands.txt:
--------------------------------------------------------------------------------
  1 |     1  ls
  2 |     2  cd /var/www/html/.well-known/acme-challenge
  3 |     3  ls
  4 |     4  sudo apt-get update
  5 |     5  docker
  6 |     6  cat /etc/os-release
  7 |     7  uname -r
  8 |     8  lscpo
  9 |     9  lscpu
 10 |    10  df -h
 11 |    11  sudo apt install docker.io
 12 |    12  sudo systemctl enable --now docker
 13 |    13  sudo usermod -aG docker $user
 14 |    14  sudo usermod -aG docker $USER
 15 |    15  newgrp docker
 16 |    16  docker --version
 17 |    17  docker run hello-world
 18 |    18  sudo ufw allow 22
 19 |    19  docker pull qdrant/qdrant
 20 |    20  docker run -p 6333:6333 -p 6334:6334     -v $(pwd)/qdrant_storage:/qdrant/storage:z     qdrant/qdrant
 21 |    21  ls
 22 |    22  sudo apt-get install openssl
 23 |    23  openssl req -x509 -newkey rsa:4096 -keyout key.pem -out cert.pem -days 365 -nodes -subj "/CN=35.184.153.33"
 24 |    24  docker run -d -p 6333:6333 -p 6334:6334     -v /path/to/cert.pem:/etc/qdrant/cert.pem     -v /path/to/key.pem:/etc/qdrant/key.pem     -e QDRANT__SERVICE__ENABLE_TLS=true     -e QDRANT__TLS__CERT=/etc/qdrant/cert.pem     -e QDRANT__TLS__KEY=/etc/qdrant/key.pem     qdrant/qdrant
 25 |    25  docker ls
 26 |    26  docker --help
 27 |    27  docker images
 28 |    28  docker ps
 29 |    29  lsof -i 8100
 30 |    30  lsof -i 6333
 31 |    31  lsof -i :6333
 32 |    32  docker ps
 33 |    33  docker ps -a
 34 |    34  docker pull qdrant/qdrant
 35 |    35  docker run -p 6333:6333 -p 6334:6334     -v $(pwd)/qdrant_storage:/qdrant/storage:z     qdrant/qdrant
 36 |    36  ls
 37 |    37  mkdir qdrant
 38 |    38  ls
 39 |    39  cd qdrant
 40 |    40  touch config.yaml
 41 |    41  vi config.yaml
 42 |    42  ls
 43 |    43  cd ..
 44 |    44  ls
 45 |    45  cd qdrant
 46 |    46  vi config.yaml
 47 |    47  ls
 48 |    48  cd ..
 49 |    49  ls
 50 |    50  touch docker-compose.yaml
 51 |    51  docker-compose up
 52 |    52  ls
 53 |    53  vi docker-compose.yaml
 54 |    54  sudo curl -L https://github.com/docker/compose/releases/download/1.25.3/docker-compose-`uname -s`-`uname -m` -o /usr/local/bin/docker-compose
 55 |    55  sudo chmod +x /usr/local/bin/docker-compose
 56 |    56  docker-compose --version
 57 |    57  docker compose up
 58 |    58  docker-compose up
 59 |    59  ls
 60 |    60  vi docker-compose.yaml
 61 |    61  docker-compose up
 62 |    62  vi docker-compose.yaml
 63 |    63  docker-compose up
 64 |    64  vi docker-compose.yaml
 65 |    65  docker-compose up
 66 |    66  ls
 67 |    67  vi docker-compose.yaml
 68 |    68  docker-compose up
 69 |    69  docker-compose up --build
 70 |    70  clear
 71 |    71  docker-compose up
 72 |    72  vi docker-compose.yaml
 73 |    73  docker-compose up
 74 |    74  ls
 75 |    75  pwd
 76 |    76  cd qdrant
 77 |    77  pwd
 78 |    78  cd ..
 79 |    79  ls
 80 |    80  pwd
 81 |    81  vi docker-compose.yaml
 82 |    82  docker-compose up
 83 |    83  docker compose down
 84 |    84  ls
 85 |    85  docker compose down
 86 |    86  docker-compose down
 87 |    87  docker-compose up
 88 |    88  ls
 89 |    89  cd qdrant
 90 |    90  ls
 91 |    91  cat config.yaml
 92 |    92  ls
 93 |    93  cd ..
 94 |    94  vi docker-compose.yaml
 95 |    95  docker-compose up
 96 |    96  docker compose down
 97 |    97  docker-compose up
 98 |    98  vi docker-compose.yaml
 99 |    99  docker-compose up
100 |   100  vi docker-compose.yaml
101 |   101  docker-compose up
102 |   102  ls
103 |   103  cd qdrant
104 |   104  cd ..
105 |   105  ls
106 |   106  cd qdrant
107 |   107  vi config.yaml
108 |   108  docker-compose up
109 |   109  ls
110 |   110  cd ..
111 |   111  docker-compose up
112 |   112  ls
113 |   113  vi docker-compose.yaml
114 |   114  docker-compose up
115 |   115  ls
116 |   116  cat docker-compose.yaml
117 |   117  cd qdrant
118 |   118  cat config.yaml
119 |   119  ls
120 |   120  cd ..
121 |   121  ls
122 |   122  vi docker-compose.yaml
123 |   123  docker-compose down
124 |   124  docker-compose up
125 |   125  ls
126 |   126  vi docker-compose.yaml
127 |   127  docker-compose up
128 |   128  ls
129 |   129  cd qdrant
130 |   130  ls
131 |   131  vi config.yaml
132 |   132  openssl rand -hex 8
133 |   133  openssl rand -hex 16
134 |   134  openssl rand -hex 32
135 |   135  ls
136 |   136  vi config.yaml
137 |   137  docker-compose up
138 |   138  ls
139 |   139  cd ..
140 |   140  ls
141 |   141  docker-compose up
142 |   142  ls
143 |   143  cat cert.pem
144 |   144  docker-compose up
145 |   145  ls
146 |   146  cat key.pem
147 |   147  docker-compose up
148 |   148  ls
149 |   149  mkdir old_tls
150 |   150  mv cert.pem old_tls/cert.pem
151 |   151  ls
152 |   152  mv key.pem old_tls/key.pem
153 |   153  ls
154 |   154  sudo apt-get install certbot
155 |   155  sudo certbot certonly --manual --preferred-challenges http -d 35.184.153.33
156 |   156  sudo certbot certonly --manual -d hasura-bots.com -d www.hasura-bots.com
157 |   157  docker-compose up
158 |   158  lsd
159 |   159  ls
160 |   160  rm cert.pem
161 |   161  ls
162 |   162  rm -f cert.pem
163 |   163  del cert.pem
164 |   164  delete cert.pem
165 |   165  ls
166 |   166  rm cert.pem
167 |   167  rmdir cert.pem
168 |   168  ls
169 |   169  rmdir key.pem
170 |   170  ls
171 |   171  cd old_tls
172 |   172  ls
173 |   173  cd ..
174 |   174  ls
175 |   175  sudo certbot certonly --manual -d hasura-bots.com -d www.hasura-bots.com
176 |   176  ls
177 |   177  cat docker-compose.yaml
178 |   178  ls
179 |   179  vi docker-compose.yaml
180 |   180  ls
181 |   181  mkdir nging
182 |   182  rmdir nging
183 |   183  ls
184 |   184  mkdir nginx
185 |   185  ls
186 |   186  cd nxinx
187 |   187  cd nginx
188 |   188  vi default.conf
189 |   189  cd ..
190 |   190  ls
191 |   191  docker-compose up -d nginx
192 |   192  docker-compose run --rm certbot certonly --webroot --webroot-path=/var/www/certbot --email tristen.harr@hasura.io --agree-tos --no-eff-email -d hasura-bots.com
193 |   193  docker-compose up -d nginx
194 |   194  docker-compose run --rm certbot certonly --webroot --webroot-path=/var/www/certbot --email tristen.harr@hasura.io --agree-tos --no-eff-email -d hasura-bots.com
195 |   195  ls
196 |   196  cd certbot
197 |   197  ls
198 |   198  cd www
199 |   199  ls
200 |   200  cd ..
201 |   201  ls
202 |   202  cd conf
203 |   203  ls
204 |   204  cd ..
205 |   205  ls
206 |   206  cd ..
207 |   207  ls
208 |   208  cd ..
209 |   209  ls
210 |   210  cd ..
211 |   211  ls
212 |   212  cd www
213 |   213  cd var
214 |   214  ls
215 |   215  cd www
216 |   216  ls
217 |   217  cd ..
218 |   218  ls
219 |   219  cd user
220 |   220  ls
221 |   221  cd home
222 |   222  ls
223 |   223  cd tristen_harr
224 |   224  ls
225 |   225  cat docker-compose.yaml
226 |   226  ls
227 |   227  cd nginx
228 |   228  lks
229 |   229  ls
230 |   230  cat default.conf
231 |   231  cd ..
232 |   232  docker compose up
233 |   233  docker-compose up
234 |   234  ls
235 |   235  cd certbot
236 |   236  ls
237 |   237  cd www
238 |   238  ls
239 |   239  cd ..
240 |   240  ls
241 |   241  cd conf
242 |   242  ls
243 |   243  cd renewal-hooks
244 |   244  ls
245 |   245  cd ..
246 |   246  ls
247 |   247  cd ..
248 |   248  ls
249 |   249  cd /etc
250 |   250  ls
251 |   251  cd letsencrypt
252 |   252  ls
253 |   253  cd keys
254 |   254  ls
255 |   255  cd keys
256 |   256  ls
257 |   257  sudo cd keys
258 |   258  ls
259 |   259  ls keys
260 |   260  sudo ls keys
261 |   261  ls
262 |   262  cd ..
263 |   263  ls
264 |   264  cd home
265 |   265  ls
266 |   266  cd tristen_harr
267 |   267  ls
268 |   268  docker-compose run --rm certbot certonly --webroot --webroot-path=/var/www/certbot --email tristen.harr@hasura.io --agree-tos --no-eff-email -d hasura-bots.com
269 |   269  docker-compose run --rm certbot certificates
270 |   270  docker-compose run --rm certbot certonly --webroot --webroot-path=/var/www/certbot --email tristen.harr@hasura.io --agree-tos --no-eff-email -d hasura-bots.com --force-renewal
271 |   271  cat docker-compose.yaml
272 |   272  ls ./certbot/conf/live
273 |   273  docker-compose down
274 |   274  sudo rm -rf ./certbot/conf/*
275 |   275  docker-compose up -d
276 |   276  docker-compose run --rm certbot certonly --webroot --webroot-path=/var/www/certbot --email tristen.harr@hasura.io --agree-tos --no-eff-email -d hasura-bots.com --force-renewal
277 |   277  docker-compose up -d
278 |   278  docker compose down
279 |   279  docker-compose down
280 |   280  docker-compose up
281 |   281  ls
282 |   282  rmdir certbot
283 |   283  sudo certbot certonly --standalone -d hasura-bots.com --agree-tos -m tristen.harr@hasura.io --preferred-challenges http
284 |   284  docker-compose up
285 |   285  ls
286 |   286  cat docker-compose.yaml
287 |   287  vi docker-compose.yaml
288 |   288  docker-compose up
289 |   289  cat docker-compose.yaml
290 |   290  ls
291 |   291  cd nginx
292 |   292  ls
293 |   293  cat default.conf
294 |   294  cd ..
295 |   295  ls
296 |   296  vi docker-compose.yaml
297 |   297  docker compose up nginx --build
298 |   298  docker-compose up nginx
299 |   299  ls
300 |   300  cat docker-compose.yaml
301 |   301  ls
302 |   302  cd certbot
303 |   303  ls
304 |   304  cd www
305 |   305  ls
306 |   306  cd ..
307 |   307  ls
308 |   308  cd /etc/certbot
309 |   309  cd /etc
310 |   310  ls
311 |   311  cd ..
312 |   312  ls
313 |   313  cd home
314 |   314  ls
315 |   315  cd tristen_harr
316 |   316  ls
317 |   317  cat docker-compose.yaml
318 |   318  /etc/letsencrypt/live/hasura-bots.com/fullchain.pem
319 |   319  cat /etc/letsencrypt/live/hasura-bots.com/fullchain.pem
320 |   320  sudo cat /etc/letsencrypt/live/hasura-bots.com/fullchain.pem
321 |   321  sudo vi /etc/letsencrypt/live/hasura-bots.com/fullchain.pem
322 |   322  sudo vi /etc/letsencrypt/live/hasura-bots.com/privkey.pem
323 |   323  sudo cd /etc/letsencrypt/live/hasura-bots.com/
324 |   324  ls
325 |   325  sudo su
326 |   326  ls
327 |   327  history > history.txt


--------------------------------------------------------------------------------
/docker-compose.yaml:
--------------------------------------------------------------------------------
  1 | version: "3.6"
  2 | services:
  3 |   postgres:
  4 |     image: postgres:15
  5 |     restart: always
  6 |     volumes:
  7 |       - db_data:/var/lib/postgresql/data
  8 |       - ./hasura_discord_postgres/data:/docker-entrypoint-initdb.d
  9 |     environment:
 10 |       POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
 11 |   graphql-engine:
 12 |     image: hasura/graphql-engine:v2.37.0.cli-migrations-v3
 13 |     ports:
 14 |       - "8080:8080"
 15 |     restart: always
 16 |     volumes:
 17 |       - ./hasura_discord_postgres/postgres/metadata:/hasura-metadata
 18 |     environment:
 19 |       ## postgres database to store Hasura metadata
 20 |       HASURA_GRAPHQL_METADATA_DATABASE_URL: ${POSTGRES_URL}
 21 |       ## this env var can be used to add the above postgres database to Hasura as a data source. this can be removed/updated based on your needs
 22 |       PG_DATABASE_URL: ${POSTGRES_URL}
 23 |       HASURA_GRAPHQL_DATABASE_URL: ${POSTGRES_URL}
 24 |       ## enable the console served by server
 25 |       HASURA_GRAPHQL_ENABLE_CONSOLE: "true" # set to "false" to disable console
 26 |       ## enable debugging mode. It is recommended to disable this in production
 27 |       HASURA_GRAPHQL_DEV_MODE: "true"
 28 |       HASURA_GRAPHQL_ENABLED_LOG_TYPES: startup, http-log, webhook-log, websocket-log, query-log
 29 |       BACKEND_URL: ${BACKEND_URL}
 30 |       BACKEND_SECRET: ${BACKEND_API_KEY}
 31 |       ## uncomment next line to run console offline (i.e load console assets from server instead of CDN)
 32 |       # HASURA_GRAPHQL_CONSOLE_ASSETS_DIR: /srv/console-assets
 33 |       ## uncomment next line to set an admin secret
 34 |       HASURA_GRAPHQL_ADMIN_SECRET: ${HASURA_GRAPHQL_ADMIN_SECRET}
 35 |       HASURA_GRAPHQL_METADATA_DEFAULTS: '{"backend_configs":{"dataconnector":{"athena":{"uri":"http://data-connector-agent:8081/api/v1/athena"},"mariadb":{"uri":"http://data-connector-agent:8081/api/v1/mariadb"},"mysql8":{"uri":"http://data-connector-agent:8081/api/v1/mysql"},"oracle":{"uri":"http://data-connector-agent:8081/api/v1/oracle"},"snowflake":{"uri":"http://data-connector-agent:8081/api/v1/snowflake"}}}}'
 36 |     depends_on:
 37 |       data-connector-agent:
 38 |         condition: service_healthy
 39 |   data-connector-agent:
 40 |     image: hasura/graphql-data-connector:v2.37.0
 41 |     restart: always
 42 |     ports:
 43 |       - 8081:8081
 44 |     environment:
 45 |       QUARKUS_LOG_LEVEL: ERROR # FATAL, ERROR, WARN, INFO, DEBUG, TRACE
 46 |       ## https://quarkus.io/guides/opentelemetry#configuration-reference
 47 |       QUARKUS_OPENTELEMETRY_ENABLED: "false"
 48 |       ## QUARKUS_OPENTELEMETRY_TRACER_EXPORTER_OTLP_ENDPOINT: http://jaeger:4317
 49 |     healthcheck:
 50 |       test: [ "CMD", "curl", "-f", "http://localhost:8081/api/v1/athena/health" ]
 51 |       interval: 5s
 52 |       timeout: 10s
 53 |       retries: 5
 54 |       start_period: 5s
 55 |   qdrant:
 56 |     image: qdrant/qdrant
 57 |     ports:
 58 |       - "6333:6333" # Change Qdrant to listen on a different port internally to avoid conflicts with Nginx
 59 |     restart: always
 60 |     volumes:
 61 |       - qdrant_data:/var/lib/qdrant
 62 |       - ./hasura_discord_qdrant/config.yaml:/qdrant/config/config.yaml
 63 |       - ./hasura_discord_qdrant/snapshots:/snapshots # Mount the directory containing snapshots
 64 |     command: >
 65 |       /bin/sh -c "
 66 |       ./qdrant --snapshot /snapshots/v3.snapshot:v3 --force-snapshot
 67 |       "
 68 |   hasura_discord_bot:
 69 |     build:
 70 |       context: ./hasura_discord_bot
 71 |       dockerfile: Dockerfile
 72 |     environment:
 73 |       CLIENT_SECRET: ${DISCORD_CLIENT_SECRET}
 74 |       SEARCH_ENDPOINT_URL: ${SEARCH_ENDPOINT_URL}
 75 |       SEARCH_ENDPOINT_API_KEY_HEADER: ${BACKEND_API_KEY_HEADER_NAME}
 76 |       SEARCH_ENDPOINT_API_KEY: ${BACKEND_API_KEY}
 77 |       GRAPHQL_URL: ${HASURA_GRAPHQL_URL}
 78 |       GRAPHQL_ADMIN_SECRET: ${HASURA_GRAPHQL_ADMIN_SECRET}
 79 |       GUILD_ID: ${GUILD_ID}
 80 |       SYNC_ON_STARTUP: 1
 81 |     depends_on:
 82 |       graphql-engine:
 83 |         condition: service_healthy
 84 | 
 85 |   hasura_discord_backend:
 86 |     build:
 87 |       context: ./hasura_discord_backend
 88 |       dockerfile: Dockerfile
 89 |     ports:
 90 |       - "8100:8100"
 91 |     environment:
 92 |       API_KEY_HEADER_NAME: ${BACKEND_API_KEY_HEADER_NAME}
 93 |       API_KEY: ${BACKEND_API_KEY}
 94 |       QDRANT_URL: ${QDRANT_URL}
 95 |       QDRANT_API_KEY: ${QDRANT_API_KEY}
 96 |       OPENAI_API_KEY: ${OPENAI_API_KEY}
 97 |       OPENAI_ORGANIZATION: ${OPENAI_ORGANIZATION}
 98 |       OPENAI_MODEL: ${OPENAI_MODEL}
 99 |       OPENAI_EMBEDDING_MODEL: ${OPENAI_EMBEDDING_MODEL}
100 |       VECTOR_SIZE: ${VECTOR_SIZE}
101 |       GRAPHQL_URL: ${HASURA_GRAPHQL_URL}
102 |       GRAPHQL_ADMIN_SECRET: ${HASURA_GRAPHQL_ADMIN_SECRET}
103 |       OPENAI_CHAT_TEMPERATURE: 0.2
104 |       PORT: 8100
105 | volumes:
106 |   db_data:
107 |   qdrant_data:
108 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/data/create.sql:
--------------------------------------------------------------------------------
 1 | SET check_function_bodies = false;
 2 | CREATE FUNCTION public.set_current_timestamp_updated_at() RETURNS trigger
 3 |     LANGUAGE plpgsql
 4 |     AS $$
 5 | DECLARE
 6 |   _new record;
 7 | BEGIN
 8 |   _new := NEW;
 9 |   _new."updated_at" = NOW();
10 |   RETURN _new;
11 | END;
12 | $$;
13 | CREATE TABLE public."COLLECTION_ENUM" (
14 |     value text NOT NULL,
15 |     comment text
16 | );
17 | CREATE TABLE public.configuration (
18 |     guild_id bigint NOT NULL,
19 |     logging_channel_id bigint NOT NULL,
20 |     mod_role_id bigint NOT NULL,
21 |     banned_user_ids bigint[] NOT NULL
22 | );
23 | CREATE TABLE public.guild_forums (
24 |     guild_id bigint NOT NULL,
25 |     forum_channel_id bigint NOT NULL,
26 |     forum_collection text NOT NULL
27 | );
28 | CREATE TABLE public.message (
29 |     thread_id text NOT NULL,
30 |     message_id text NOT NULL,
31 |     content text NOT NULL,
32 |     from_bot boolean NOT NULL,
33 |     created_at timestamp with time zone DEFAULT now() NOT NULL,
34 |     updated_at timestamp with time zone DEFAULT now() NOT NULL,
35 |     first_message boolean NOT NULL,
36 |     mentions_bot boolean NOT NULL,
37 |     sources text,
38 |     processed boolean DEFAULT false NOT NULL
39 | );
40 | CREATE TABLE public.thread (
41 |     created_at timestamp with time zone DEFAULT now() NOT NULL,
42 |     updated_at timestamp with time zone DEFAULT now() NOT NULL,
43 |     open boolean DEFAULT true NOT NULL,
44 |     solved boolean DEFAULT false NOT NULL,
45 |     thread_id text NOT NULL,
46 |     title text,
47 |     collection text NOT NULL,
48 |     thread_controller_id text NOT NULL,
49 |     author_id text NOT NULL,
50 |     solved_votes integer DEFAULT 0 NOT NULL,
51 |     failed_votes integer DEFAULT 0 NOT NULL
52 | );
53 | ALTER TABLE ONLY public."COLLECTION_ENUM"
54 |     ADD CONSTRAINT "COLLECTION_ENUM_pkey" PRIMARY KEY (value);
55 | ALTER TABLE ONLY public.configuration
56 |     ADD CONSTRAINT configuration_pkey PRIMARY KEY (guild_id);
57 | ALTER TABLE ONLY public.guild_forums
58 |     ADD CONSTRAINT guild_forums_pkey PRIMARY KEY (guild_id, forum_channel_id);
59 | ALTER TABLE ONLY public.message
60 |     ADD CONSTRAINT message_pkey PRIMARY KEY (thread_id, message_id);
61 | ALTER TABLE ONLY public.thread
62 |     ADD CONSTRAINT thread_pkey PRIMARY KEY (thread_id);
63 | ALTER TABLE ONLY public.thread
64 |     ADD CONSTRAINT thread_thread_controller_id_key UNIQUE (thread_controller_id);
65 | CREATE TRIGGER set_public_message_updated_at BEFORE UPDATE ON public.message FOR EACH ROW EXECUTE FUNCTION public.set_current_timestamp_updated_at();
66 | COMMENT ON TRIGGER set_public_message_updated_at ON public.message IS 'trigger to set value of column "updated_at" to current timestamp on row update';
67 | CREATE TRIGGER set_public_thread_updated_at BEFORE UPDATE ON public.thread FOR EACH ROW EXECUTE FUNCTION public.set_current_timestamp_updated_at();
68 | COMMENT ON TRIGGER set_public_thread_updated_at ON public.thread IS 'trigger to set value of column "updated_at" to current timestamp on row update';
69 | ALTER TABLE ONLY public.guild_forums
70 |     ADD CONSTRAINT guild_forums_forum_collection_fkey FOREIGN KEY (forum_collection) REFERENCES public."COLLECTION_ENUM"(value) ON UPDATE CASCADE ON DELETE CASCADE;
71 | ALTER TABLE ONLY public.guild_forums
72 |     ADD CONSTRAINT guild_forums_guild_id_fkey FOREIGN KEY (guild_id) REFERENCES public.configuration(guild_id) ON UPDATE CASCADE ON DELETE CASCADE;
73 | ALTER TABLE ONLY public.message
74 |     ADD CONSTRAINT message_thread_id_fkey FOREIGN KEY (thread_id) REFERENCES public.thread(thread_id) ON UPDATE CASCADE ON DELETE CASCADE;
75 | ALTER TABLE ONLY public.thread
76 |     ADD CONSTRAINT thread_collection_fkey FOREIGN KEY (collection) REFERENCES public."COLLECTION_ENUM"(value) ON UPDATE CASCADE ON DELETE CASCADE;


--------------------------------------------------------------------------------
/hasura_discord_postgres/data/seed.sql:
--------------------------------------------------------------------------------
 1 | COPY public."COLLECTION_ENUM" (value, comment) FROM stdin;
 2 | v3	\N
 3 | \.
 4 | COPY public.configuration (guild_id, logging_channel_id, mod_role_id, banned_user_ids) FROM stdin;
 5 | 1204239405011832883	1204239405456297995	1209640032747528202	{}
 6 | \.
 7 | COPY public.guild_forums (guild_id, forum_channel_id, forum_collection) FROM stdin;
 8 | 1204239405011832883	1204246613145419806	v3
 9 | \.
10 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/config.yaml:
--------------------------------------------------------------------------------
1 | version: 3
2 | endpoint: http://localhost:8080
3 | metadata_directory: metadata
4 | actions:
5 |   kind: synchronous
6 |   handler_webhook_baseurl: http://localhost:3000
7 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/actions.graphql:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/hasura_discord_postgres/postgres/metadata/actions.graphql


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/actions.yaml:
--------------------------------------------------------------------------------
1 | actions: []
2 | custom_types:
3 |   enums: []
4 |   input_objects: []
5 |   objects: []
6 |   scalars: []
7 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/allow_list.yaml:
--------------------------------------------------------------------------------
1 | []
2 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/api_limits.yaml:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/backend_configs.yaml:
--------------------------------------------------------------------------------
 1 | dataconnector:
 2 |   athena:
 3 |     uri: http://super-connector.data-connector:8081/api/v1/athena
 4 |   mariadb:
 5 |     uri: http://super-connector.data-connector:8081/api/v1/mariadb
 6 |   mongodb:
 7 |     uri: http://mongodb-connector.data-connector:8081
 8 |   mysql8:
 9 |     uri: http://super-connector.data-connector:8081/api/v1/mysql
10 |   oracle:
11 |     uri: http://super-connector.data-connector:8081/api/v1/oracle
12 |   snowflake:
13 |     uri: http://super-connector.data-connector:8081/api/v1/snowflake
14 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/cron_triggers.yaml:
--------------------------------------------------------------------------------
1 | []
2 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/databases/databases.yaml:
--------------------------------------------------------------------------------
 1 | - name: postgres
 2 |   kind: postgres
 3 |   configuration:
 4 |     connection_info:
 5 |       database_url:
 6 |         from_env: HASURA_GRAPHQL_DATABASE_URL
 7 |       isolation_level: read-committed
 8 |       use_prepared_statements: false
 9 |   tables: "!include postgres/tables/tables.yaml"
10 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/databases/postgres/tables/public_COLLECTION_ENUM.yaml:
--------------------------------------------------------------------------------
 1 | table:
 2 |   name: COLLECTION_ENUM
 3 |   schema: public
 4 | is_enum: true
 5 | array_relationships:
 6 |   - name: guild_forums
 7 |     using:
 8 |       foreign_key_constraint_on:
 9 |         column: forum_collection
10 |         table:
11 |           name: guild_forums
12 |           schema: public
13 |   - name: threads
14 |     using:
15 |       foreign_key_constraint_on:
16 |         column: collection
17 |         table:
18 |           name: thread
19 |           schema: public
20 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/databases/postgres/tables/public_configuration.yaml:
--------------------------------------------------------------------------------
 1 | table:
 2 |   name: configuration
 3 |   schema: public
 4 | array_relationships:
 5 |   - name: guild_forums
 6 |     using:
 7 |       foreign_key_constraint_on:
 8 |         column: guild_id
 9 |         table:
10 |           name: guild_forums
11 |           schema: public
12 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/databases/postgres/tables/public_guild_forums.yaml:
--------------------------------------------------------------------------------
 1 | table:
 2 |   name: guild_forums
 3 |   schema: public
 4 | object_relationships:
 5 |   - name: COLLECTION_ENUM
 6 |     using:
 7 |       foreign_key_constraint_on: forum_collection
 8 |   - name: configuration
 9 |     using:
10 |       foreign_key_constraint_on: guild_id
11 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/databases/postgres/tables/public_message.yaml:
--------------------------------------------------------------------------------
 1 | table:
 2 |   name: message
 3 |   schema: public
 4 | object_relationships:
 5 |   - name: thread
 6 |     using:
 7 |       foreign_key_constraint_on: thread_id
 8 | event_triggers:
 9 |   - name: new_message_event
10 |     definition:
11 |       enable_manual: false
12 |       insert:
13 |         columns: '*'
14 |     retry_conf:
15 |       interval_sec: 600
16 |       num_retries: 3
17 |       timeout_sec: 600
18 |     webhook: '{{BACKEND_URL}}'
19 |     headers:
20 |       - name: X-API-KEY
21 |         value_from_env: BACKEND_SECRET
22 |     request_transform:
23 |       method: POST
24 |       query_params: {}
25 |       template_engine: Kriti
26 |       url: '{{$base_url}}/new_message_event/'
27 |       version: 2
28 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/databases/postgres/tables/public_thread.yaml:
--------------------------------------------------------------------------------
 1 | table:
 2 |   name: thread
 3 |   schema: public
 4 | object_relationships:
 5 |   - name: COLLECTION_ENUM
 6 |     using:
 7 |       foreign_key_constraint_on: collection
 8 | array_relationships:
 9 |   - name: messages
10 |     using:
11 |       foreign_key_constraint_on:
12 |         column: thread_id
13 |         table:
14 |           name: message
15 |           schema: public
16 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/databases/postgres/tables/tables.yaml:
--------------------------------------------------------------------------------
1 | - "!include public_COLLECTION_ENUM.yaml"
2 | - "!include public_configuration.yaml"
3 | - "!include public_guild_forums.yaml"
4 | - "!include public_message.yaml"
5 | - "!include public_thread.yaml"
6 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/graphql_schema_introspection.yaml:
--------------------------------------------------------------------------------
1 | disabled_for_roles: []
2 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/inherited_roles.yaml:
--------------------------------------------------------------------------------
1 | []
2 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/metrics_config.yaml:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/network.yaml:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/opentelemetry.yaml:
--------------------------------------------------------------------------------
1 | {}
2 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/query_collections.yaml:
--------------------------------------------------------------------------------
1 | []
2 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/remote_schemas.yaml:
--------------------------------------------------------------------------------
1 | []
2 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/rest_endpoints.yaml:
--------------------------------------------------------------------------------
1 | []
2 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/metadata/version.yaml:
--------------------------------------------------------------------------------
1 | version: 3
2 | 


--------------------------------------------------------------------------------
/hasura_discord_postgres/postgres/migrations/postgres/1708450043631_init/up.sql:
--------------------------------------------------------------------------------
 1 | SET check_function_bodies = false;
 2 | CREATE EXTENSION IF NOT EXISTS pgcrypto WITH SCHEMA public;
 3 | COMMENT ON EXTENSION pgcrypto IS 'cryptographic functions';
 4 | CREATE FUNCTION public.set_current_timestamp_updated_at() RETURNS trigger
 5 |     LANGUAGE plpgsql
 6 |     AS $$
 7 | DECLARE
 8 |   _new record;
 9 | BEGIN
10 |   _new := NEW;
11 |   _new."updated_at" = NOW();
12 |   RETURN _new;
13 | END;
14 | $$;
15 | CREATE TABLE public."COLLECTION_ENUM" (
16 |     value text NOT NULL,
17 |     comment text
18 | );
19 | CREATE TABLE public.configuration (
20 |     guild_id bigint NOT NULL,
21 |     logging_channel_id bigint NOT NULL,
22 |     mod_role_id bigint NOT NULL,
23 |     banned_user_ids bigint[] NOT NULL
24 | );
25 | CREATE TABLE public.guild_forums (
26 |     guild_id bigint NOT NULL,
27 |     forum_channel_id bigint NOT NULL,
28 |     forum_collection text NOT NULL
29 | );
30 | CREATE TABLE public.message (
31 |     thread_id text NOT NULL,
32 |     message_id text NOT NULL,
33 |     content text NOT NULL,
34 |     from_bot boolean NOT NULL,
35 |     created_at timestamp with time zone DEFAULT now() NOT NULL,
36 |     updated_at timestamp with time zone DEFAULT now() NOT NULL,
37 |     first_message boolean NOT NULL,
38 |     mentions_bot boolean NOT NULL,
39 |     sources text,
40 |     processed boolean DEFAULT false NOT NULL
41 | );
42 | CREATE TABLE public.thread (
43 |     created_at timestamp with time zone DEFAULT now() NOT NULL,
44 |     updated_at timestamp with time zone DEFAULT now() NOT NULL,
45 |     open boolean DEFAULT true NOT NULL,
46 |     solved boolean DEFAULT false NOT NULL,
47 |     thread_id text NOT NULL,
48 |     title text,
49 |     collection text NOT NULL,
50 |     thread_controller_id text NOT NULL,
51 |     author_id text NOT NULL,
52 |     solved_votes integer DEFAULT 0 NOT NULL,
53 |     failed_votes integer DEFAULT 0 NOT NULL
54 | );
55 | ALTER TABLE ONLY public."COLLECTION_ENUM"
56 |     ADD CONSTRAINT "COLLECTION_ENUM_pkey" PRIMARY KEY (value);
57 | ALTER TABLE ONLY public.configuration
58 |     ADD CONSTRAINT configuration_pkey PRIMARY KEY (guild_id);
59 | ALTER TABLE ONLY public.guild_forums
60 |     ADD CONSTRAINT guild_forums_pkey PRIMARY KEY (guild_id, forum_channel_id);
61 | ALTER TABLE ONLY public.message
62 |     ADD CONSTRAINT message_pkey PRIMARY KEY (thread_id, message_id);
63 | ALTER TABLE ONLY public.thread
64 |     ADD CONSTRAINT thread_pkey PRIMARY KEY (thread_id);
65 | ALTER TABLE ONLY public.thread
66 |     ADD CONSTRAINT thread_thread_controller_id_key UNIQUE (thread_controller_id);
67 | CREATE TRIGGER set_public_message_updated_at BEFORE UPDATE ON public.message FOR EACH ROW EXECUTE FUNCTION public.set_current_timestamp_updated_at();
68 | COMMENT ON TRIGGER set_public_message_updated_at ON public.message IS 'trigger to set value of column "updated_at" to current timestamp on row update';
69 | CREATE TRIGGER set_public_thread_updated_at BEFORE UPDATE ON public.thread FOR EACH ROW EXECUTE FUNCTION public.set_current_timestamp_updated_at();
70 | COMMENT ON TRIGGER set_public_thread_updated_at ON public.thread IS 'trigger to set value of column "updated_at" to current timestamp on row update';
71 | ALTER TABLE ONLY public.guild_forums
72 |     ADD CONSTRAINT guild_forums_forum_collection_fkey FOREIGN KEY (forum_collection) REFERENCES public."COLLECTION_ENUM"(value) ON UPDATE CASCADE ON DELETE CASCADE;
73 | ALTER TABLE ONLY public.guild_forums
74 |     ADD CONSTRAINT guild_forums_guild_id_fkey FOREIGN KEY (guild_id) REFERENCES public.configuration(guild_id) ON UPDATE CASCADE ON DELETE CASCADE;
75 | ALTER TABLE ONLY public.message
76 |     ADD CONSTRAINT message_thread_id_fkey FOREIGN KEY (thread_id) REFERENCES public.thread(thread_id) ON UPDATE CASCADE ON DELETE CASCADE;
77 | ALTER TABLE ONLY public.thread
78 |     ADD CONSTRAINT thread_collection_fkey FOREIGN KEY (collection) REFERENCES public."COLLECTION_ENUM"(value) ON UPDATE CASCADE ON DELETE CASCADE;
79 | 


--------------------------------------------------------------------------------
/hasura_discord_qdrant/config.yaml:
--------------------------------------------------------------------------------
  1 | log_level: INFO
  2 | 
  3 | storage:
  4 |   # Where to store all the data
  5 |   storage_path: ./storage
  6 | 
  7 |   # Where to store snapshots
  8 |   snapshots_path: ./snapshots
  9 | 
 10 |   # Where to store temporary files
 11 |   # If null, temporary snapshot are stored in: storage/snapshots_temp/
 12 |   temp_path: null
 13 | 
 14 |   # If true - point's payload will not be stored in memory.
 15 |   # It will be read from the disk every time it is requested.
 16 |   # This setting saves RAM by (slightly) increasing the response time.
 17 |   # Note: those payload values that are involved in filtering and are indexed - remain in RAM.
 18 |   on_disk_payload: true
 19 | 
 20 |   # Maximum number of concurrent updates to shard replicas
 21 |   # If `null` - maximum concurrency is used.
 22 |   update_concurrency: null
 23 | 
 24 |   # Write-ahead-log related configuration
 25 |   wal:
 26 |     # Size of a single WAL segment
 27 |     wal_capacity_mb: 32
 28 | 
 29 |     # Number of WAL segments to create ahead of actual data requirement
 30 |     wal_segments_ahead: 0
 31 | 
 32 |   # Normal node - receives all updates and answers all queries
 33 |   node_type: "Normal"
 34 | 
 35 |   # Listener node - receives all updates, but does not answer search/read queries
 36 |   # Useful for setting up a dedicated backup node
 37 |   # node_type: "Listener"
 38 | 
 39 |   performance:
 40 |     # Number of parallel threads used for search operations. If 0 - auto selection.
 41 |     max_search_threads: 0
 42 | 
 43 |     # Max total number of threads, which can be used for running optimization processes across all collections.
 44 |     # Note: Each optimization thread will also use `max_indexing_threads` for index building.
 45 |     # So total number of threads used for optimization will be `max_optimization_threads * max_indexing_threads`
 46 |     max_optimization_threads: 1
 47 | 
 48 |     # Prevent DDoS of too many concurrent updates in distributed mode.
 49 |     # One external update usually triggers multiple internal updates, which breaks internal
 50 |     # timings. For example, the health check timing and consensus timing.
 51 |     # If null - auto selection.
 52 |     update_rate_limit: null
 53 | 
 54 |     # Limit for number of incoming automatic shard transfers per collection on this node, does not affect user-requested transfers.
 55 |     # The same value should be used on all nodes in a cluster.
 56 |     # Default is to allow 1 transfer.
 57 |     # If null - allow unlimited transfers.
 58 |     #incoming_shard_transfers_limit: 1
 59 | 
 60 |     # Limit for number of outgoing automatic shard transfers per collection on this node, does not affect user-requested transfers.
 61 |     # The same value should be used on all nodes in a cluster.
 62 |     # Default is to allow 1 transfer.
 63 |     # If null - allow unlimited transfers.
 64 |     #outgoing_shard_transfers_limit: 1
 65 | 
 66 |   optimizers:
 67 |     # The minimal fraction of deleted vectors in a segment, required to perform segment optimization
 68 |     deleted_threshold: 0.2
 69 | 
 70 |     # The minimal number of vectors in a segment, required to perform segment optimization
 71 |     vacuum_min_vector_number: 1000
 72 | 
 73 |     # Target amount of segments optimizer will try to keep.
 74 |     # Real amount of segments may vary depending on multiple parameters:
 75 |     #  - Amount of stored points
 76 |     #  - Current write RPS
 77 |     #
 78 |     # It is recommended to select default number of segments as a factor of the number of search threads,
 79 |     # so that each segment would be handled evenly by one of the threads.
 80 |     # If `default_segment_number = 0`, will be automatically selected by the number of available CPUs
 81 |     default_segment_number: 0
 82 | 
 83 |     # Do not create segments larger this size (in KiloBytes).
 84 |     # Large segments might require disproportionately long indexation times,
 85 |     # therefore it makes sense to limit the size of segments.
 86 |     #
 87 |     # If indexation speed have more priority for your - make this parameter lower.
 88 |     # If search speed is more important - make this parameter higher.
 89 |     # Note: 1Kb = 1 vector of size 256
 90 |     # If not set, will be automatically selected considering the number of available CPUs.
 91 |     max_segment_size_kb: null
 92 | 
 93 |     # Maximum size (in KiloBytes) of vectors to store in-memory per segment.
 94 |     # Segments larger than this threshold will be stored as read-only memmaped file.
 95 |     # To enable memmap storage, lower the threshold
 96 |     # Note: 1Kb = 1 vector of size 256
 97 |     # To explicitly disable mmap optimization, set to `0`.
 98 |     # If not set, will be disabled by default.
 99 |     memmap_threshold_kb: null
100 | 
101 |     # Maximum size (in KiloBytes) of vectors allowed for plain index.
102 |     # Default value based on https://github.com/google-research/google-research/blob/master/scann/docs/algorithms.md
103 |     # Note: 1Kb = 1 vector of size 256
104 |     # To explicitly disable vector indexing, set to `0`.
105 |     # If not set, the default value will be used.
106 |     indexing_threshold_kb: 20000
107 | 
108 |     # Interval between forced flushes.
109 |     flush_interval_sec: 5
110 | 
111 |     # Max number of threads, which can be used for optimization per collection.
112 |     # Note: Each optimization thread will also use `max_indexing_threads` for index building.
113 |     # So total number of threads used for optimization will be `max_optimization_threads * max_indexing_threads`
114 |     # If `max_optimization_threads = 0`, optimization will be disabled.
115 |     max_optimization_threads: 1
116 | 
117 |   # Default parameters of HNSW Index. Could be overridden for each collection or named vector individually
118 |   hnsw_index:
119 |     # Number of edges per node in the index graph. Larger the value - more accurate the search, more space required.
120 |     m: 16
121 |     # Number of neighbours to consider during the index building. Larger the value - more accurate the search, more time required to build index.
122 |     ef_construct: 100
123 |     # Minimal size (in KiloBytes) of vectors for additional payload-based indexing.
124 |     # If payload chunk is smaller than `full_scan_threshold_kb` additional indexing won't be used -
125 |     # in this case full-scan search should be preferred by query planner and additional indexing is not required.
126 |     # Note: 1Kb = 1 vector of size 256
127 |     full_scan_threshold_kb: 10000
128 |     # Number of parallel threads used for background index building. If 0 - auto selection.
129 |     max_indexing_threads: 0
130 |     # Store HNSW index on disk. If set to false, index will be stored in RAM. Default: false
131 |     on_disk: false
132 |     # Custom M param for hnsw graph built for payload index. If not set, default M will be used.
133 |     payload_m: null
134 | 
135 | 
136 | service:
137 | 
138 |   # Maximum size of POST data in a single request in megabytes
139 |   max_request_size_mb: 32
140 | 
141 |   # Number of parallel workers used for serving the api. If 0 - equal to the number of available cores.
142 |   # If missing - Same as storage.max_search_threads
143 |   max_workers: 0
144 | 
145 |   # Host to bind the service on
146 |   host: 0.0.0.0
147 | 
148 |   # HTTP(S) port to bind the service on
149 |   http_port: 6333
150 | 
151 |   # gRPC port to bind the service on.
152 |   # If `null` - gRPC is disabled. Default: null
153 |   # Comment to disable gRPC:
154 |   grpc_port: 6334
155 | 
156 |   # Enable CORS headers in REST API.
157 |   # If enabled, browsers would be allowed to query REST endpoints regardless of query origin.
158 |   # More info: https://developer.mozilla.org/en-US/docs/Web/HTTP/CORS
159 |   # Default: true
160 |   enable_cors: true
161 | 
162 |   # Enable HTTPS for the REST and gRPC API
163 |   enable_tls: false
164 | 
165 |   # Check user HTTPS client certificate against CA file specified in tls config
166 |   verify_https_client_certificate: false
167 | 
168 |   # Set an api-key.
169 |   # If set, all requests must include a header with the api-key.
170 |   # example header: `api-key: <API-KEY>`
171 |   #
172 |   # If you enable this you should also enable TLS.
173 |   # (Either above or via an external service like nginx.)
174 |   # Sending an api-key over an unencrypted channel is insecure.
175 |   #
176 |   # Uncomment to enable.
177 |   # api_key: your_secret_api_key_here
178 | 
179 |   # Set an api-key for read-only operations.
180 |   # If set, all requests must include a header with the api-key.
181 |   # example header: `api-key: <API-KEY>`
182 |   #
183 |   # If you enable this you should also enable TLS.
184 |   # (Either above or via an external service like nginx.)
185 |   # Sending an api-key over an unencrypted channel is insecure.
186 |   #
187 |   # Uncomment to enable.
188 |   # read_only_api_key: your_secret_read_only_api_key_here
189 | 
190 | cluster:
191 |   # Use `enabled: true` to run Qdrant in distributed deployment mode
192 |   enabled: false
193 | 
194 |   # Configuration of the inter-cluster communication
195 |   p2p:
196 |     # Port for internal communication between peers
197 |     port: 6335
198 | 
199 |     # Use TLS for communication between peers
200 |     enable_tls: false
201 | 
202 |   # Configuration related to distributed consensus algorithm
203 |   consensus:
204 |     # How frequently peers should ping each other.
205 |     # Setting this parameter to lower value will allow consensus
206 |     # to detect disconnected nodes earlier, but too frequent
207 |     # tick period may create significant network and CPU overhead.
208 |     # We encourage you NOT to change this parameter unless you know what you are doing.
209 |     tick_period_ms: 100
210 | 
211 | 
212 | # Set to true to prevent service from sending usage statistics to the developers.
213 | # Read more: https://qdrant.tech/documentation/guides/telemetry
214 | telemetry_disabled: false
215 | 
216 | 
217 | # TLS configuration.
218 | # Required if either service.enable_tls or cluster.p2p.enable_tls is true.
219 | tls:
220 |   # Server certificate chain file
221 |   cert: ./tls/cert.pem
222 | 
223 |   # Server private key file
224 |   key: ./tls/key.pem
225 | 
226 |   # Certificate authority certificate file.
227 |   # This certificate will be used to validate the certificates
228 |   # presented by other nodes during inter-cluster communication.
229 |   #
230 |   # If verify_https_client_certificate is true, it will verify
231 |   # HTTPS client certificate
232 |   #
233 |   # Required if cluster.p2p.enable_tls is true.
234 |   ca_cert: ./tls/cacert.pem
235 | 
236 |   # TTL in seconds to reload certificate from disk, useful for certificate rotations.
237 |   # Only works for HTTPS endpoints. Does not support gRPC (and intra-cluster communication).
238 |   # If `null` - TTL is disabled.
239 |   cert_ttl: 3600


--------------------------------------------------------------------------------
/hasura_discord_qdrant/snapshots/v3.snapshot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/hasura_discord_qdrant/snapshots/v3.snapshot


--------------------------------------------------------------------------------
/images/image1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/image1.gif


--------------------------------------------------------------------------------
/images/image10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/image10.png


--------------------------------------------------------------------------------
/images/image11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/image11.png


--------------------------------------------------------------------------------
/images/image12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/image12.png


--------------------------------------------------------------------------------
/images/image13.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/image13.gif


--------------------------------------------------------------------------------
/images/image2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/image2.gif


--------------------------------------------------------------------------------
/images/image3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/image3.png


--------------------------------------------------------------------------------
/images/image4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/image4.png


--------------------------------------------------------------------------------
/images/image5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/image5.png


--------------------------------------------------------------------------------
/images/image6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/image6.png


--------------------------------------------------------------------------------
/images/image7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/image7.png


--------------------------------------------------------------------------------
/images/image8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/image8.png


--------------------------------------------------------------------------------
/images/image9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/image9.png


--------------------------------------------------------------------------------
/images/setup/add_bot_to_server.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/add_bot_to_server.png


--------------------------------------------------------------------------------
/images/setup/bot_message.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/bot_message.png


--------------------------------------------------------------------------------
/images/setup/bot_settings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/bot_settings.png


--------------------------------------------------------------------------------
/images/setup/bot_talk.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/bot_talk.png


--------------------------------------------------------------------------------
/images/setup/commands.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/commands.png


--------------------------------------------------------------------------------
/images/setup/forum_channel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/forum_channel.png


--------------------------------------------------------------------------------
/images/setup/generated_url.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/generated_url.png


--------------------------------------------------------------------------------
/images/setup/hasura_console.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/hasura_console.png


--------------------------------------------------------------------------------
/images/setup/hello.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/hello.png


--------------------------------------------------------------------------------
/images/setup/hello_world.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/hello_world.png


--------------------------------------------------------------------------------
/images/setup/logging_channel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/logging_channel.png


--------------------------------------------------------------------------------
/images/setup/name_application.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/name_application.png


--------------------------------------------------------------------------------
/images/setup/new_application.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/new_application.jpeg


--------------------------------------------------------------------------------
/images/setup/qdrant_dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/qdrant_dashboard.png


--------------------------------------------------------------------------------
/images/setup/redoc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/redoc.png


--------------------------------------------------------------------------------
/images/setup/reset_token.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/reset_token.jpeg


--------------------------------------------------------------------------------
/images/setup/role_id.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/role_id.png


--------------------------------------------------------------------------------
/images/setup/search.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/search.png


--------------------------------------------------------------------------------
/images/setup/search_results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/search_results.png


--------------------------------------------------------------------------------
/images/setup/server_id.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/server_id.png


--------------------------------------------------------------------------------
/images/setup/server_settings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/server_settings.png


--------------------------------------------------------------------------------
/images/setup/set_scopes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/set_scopes.png


--------------------------------------------------------------------------------
/images/setup/swagger.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/swagger.png


--------------------------------------------------------------------------------
/images/setup/test_post.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/test_post.png


--------------------------------------------------------------------------------
/images/setup/token.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hasura/hasura-discord-docs-bot/ae2083c4d2c04f29af47f980a9dcf579c172c772/images/setup/token.png


--------------------------------------------------------------------------------
/seed.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "COLLECTION_ENUM": [
 3 |     {
 4 |       "value": "v3"
 5 |     }
 6 |   ],
 7 |   "configuration": [
 8 |     {
 9 |       "guild_id": 1204239405011832883,
10 |       "logging_channel_id": 1204239405456297995,
11 |       "mod_role_id": 1209640032747528202,
12 |       "banned_user_ids": []
13 |     }
14 |   ],
15 |   "guild_forums": [
16 |     {
17 |       "guild_id": 1204239405011832883,
18 |       "forum_channel_id": 1204246613145419806,
19 |       "forum_collection": "v3"
20 |     }
21 |   ]
22 | }


--------------------------------------------------------------------------------
/seed.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | 
 4 | def load_json_data(file_path):
 5 |     with open(file_path, 'r') as file:
 6 |         return json.load(file)
 7 | 
 8 | 
 9 | def format_array_for_copy(array):
10 |     # Format the array as a PostgreSQL array string
11 |     return '{' + ','.join(map(str, array)) + '}'
12 | 
13 | 
14 | def generate_copy_statements(data):
15 |     # Generate COPY statements for COLLECTION_ENUM
16 |     collection_enum = "COPY public.\"COLLECTION_ENUM\" (value, comment) FROM stdin;\n"
17 |     for item in data["COLLECTION_ENUM"]:
18 |         value = item["value"]
19 |         collection_enum += f"{value}\t\\N\n"
20 |     collection_enum += "\\.\n"
21 | 
22 |     # Generate COPY statements for configuration
23 |     configuration = "COPY public.configuration (guild_id, logging_channel_id, mod_role_id, banned_user_ids) FROM stdin;\n"
24 |     for item in data["configuration"]:
25 |         guild_id = item["guild_id"]
26 |         logging_channel_id = item["logging_channel_id"]
27 |         mod_role_id = item["mod_role_id"]
28 |         banned_user_ids = format_array_for_copy(item["banned_user_ids"])
29 |         configuration += f"{guild_id}\t{logging_channel_id}\t{mod_role_id}\t{banned_user_ids}\n"
30 |     configuration += "\\.\n"
31 | 
32 |     # Generate COPY statements for guild_forums
33 |     guild_forums = "COPY public.guild_forums (guild_id, forum_channel_id, forum_collection) FROM stdin;\n"
34 |     for item in data["guild_forums"]:
35 |         guild_id = item["guild_id"]
36 |         forum_channel_id = item["forum_channel_id"]
37 |         forum_collection = item["forum_collection"]
38 |         guild_forums += f"{guild_id}\t{forum_channel_id}\t{forum_collection}\n"
39 |     guild_forums += "\\.\n"
40 | 
41 |     return collection_enum + configuration + guild_forums
42 | 
43 | 
44 | def create_seed_sql_file(json_file_path, output_file_path):
45 |     data = load_json_data(json_file_path)
46 |     copy_statements = generate_copy_statements(data)
47 |     with open(output_file_path, 'w') as file:
48 |         file.write(copy_statements)
49 |     print(f"SQL seed file created at {output_file_path}")
50 | 
51 | 
52 | # Example usage
53 | if __name__ == "__main__":
54 |     json_file_path = 'seed.json'  # Update this path to your JSON file location
55 |     output_file_path = 'hasura_discord_postgres/data/seed.sql'  # Update this path to where you want the SQL file saved
56 |     create_seed_sql_file(json_file_path, output_file_path)
57 | 


--------------------------------------------------------------------------------