├── __init__.py
├── plugins
    ├── __init__.py
    └── pagerduty.py
├── .dockerignore
├── image
    ├── README.md
    └── MaxAI.png
├── requirements-dev.txt
├── package.json
├── requirements.in
├── .github
    └── workflows
    │   └── fly.yml
├── Dockerfile
├── count-tokens.py
├── yarn.lock
├── fly.toml
├── docker-compose.yml
├── seed.py
├── classification.py
├── main.py
├── ai.py
├── insight_generator.py
├── .gitignore
├── pipeline.py
├── README.md
├── requirements.txt
├── slack.py
└── inference.py


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | **/.git
2 | 


--------------------------------------------------------------------------------
/image/README.md:
--------------------------------------------------------------------------------
1 | Images
2 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | pip-tools==6.13.0
2 | ruff==0.0.271


--------------------------------------------------------------------------------
/image/MaxAI.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PostHog/max-ai/HEAD/image/MaxAI.png


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 |   "name": "max-ai",
3 |   "packageManager": "yarn@3.5.0",
4 |   "dependencies": {
5 |     "ignore": "^5.2.4"
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/requirements.in:
--------------------------------------------------------------------------------
 1 | python-dotenv>=1.0.0
 2 | weaviate-client==3.19.2
 3 | fastapi>=0.95.0
 4 | openai>=0.27.2
 5 | pdpyras>=4.5.2
 6 | posthog>=2.5.0
 7 | sentry-sdk[fastapi]>=1.19.1
 8 | slack-bolt>=1.17.0
 9 | tiktoken>=0.3.0
10 | uvicorn>=0.21.1
11 | requests>=2.28.2
12 | langchain>=0.0.193
13 | qdrant-client>=1.2.0
14 | GitPython==3.1.31
15 | sentence-transformers==2.2.2
16 | gunicorn==20.1.0
17 | torch==2.0.1


--------------------------------------------------------------------------------
/.github/workflows/fly.yml:
--------------------------------------------------------------------------------
 1 | name: Fly Deploy
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - master
 6 | jobs:
 7 |   deploy:
 8 |     name: Deploy app
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - uses: actions/checkout@v3
12 |       - uses: superfly/flyctl-actions/setup-flyctl@master
13 |       - run: flyctl deploy --remote-only
14 |         env:
15 |           FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.10-slim
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | RUN apt-get update && apt-get install -y --no-install-recommends gcc g++ git libc6-dev
 6 | 
 7 | COPY requirements.txt .
 8 | RUN pip install --no-cache-dir -r requirements.txt
 9 | 
10 | ENV PYTHONDONTWRITEBYTECODE 1
11 | ENV PYTHONUNBUFFERED 1
12 | 
13 | COPY . .
14 | 
15 | EXPOSE 8000
16 | 
17 | CMD ["gunicorn", "-w", "4", "--timeout", "120", "-k", "uvicorn.workers.UvicornWorker", "main:app", "--bind", "0.0.0.0:8000"]
18 | 
19 | 


--------------------------------------------------------------------------------
/count-tokens.py:
--------------------------------------------------------------------------------
 1 | import tiktoken
 2 | 
 3 | from inference import prompt
 4 | 
 5 | encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
 6 | 
 7 | 
 8 | def num_tokens_from_string(string: str, encoding_name: str) -> int:
 9 |     """Returns the number of tokens in a text string."""
10 |     encoding = tiktoken.get_encoding(encoding_name)
11 |     num_tokens = len(encoding.encode(string))
12 |     return num_tokens
13 | 
14 | 
15 | # example
16 | print(num_tokens_from_string(prompt, "cl100k_base"))


--------------------------------------------------------------------------------
/yarn.lock:
--------------------------------------------------------------------------------
 1 | # This file is generated by running "yarn install" inside your project.
 2 | # Manual changes might be lost - proceed with caution!
 3 | 
 4 | __metadata:
 5 |   version: 6
 6 |   cacheKey: 8
 7 | 
 8 | "ignore@npm:^5.2.4":
 9 |   version: 5.2.4
10 |   resolution: "ignore@npm:5.2.4"
11 |   checksum: 3d4c309c6006e2621659311783eaea7ebcd41fe4ca1d78c91c473157ad6666a57a2df790fe0d07a12300d9aac2888204d7be8d59f9aaf665b1c7fcdb432517ef
12 |   languageName: node
13 |   linkType: hard
14 | 
15 | "max-ai@workspace:.":
16 |   version: 0.0.0-use.local
17 |   resolution: "max-ai@workspace:."
18 |   dependencies:
19 |     ignore: ^5.2.4
20 |   languageName: unknown
21 |   linkType: soft
22 | 


--------------------------------------------------------------------------------
/fly.toml:
--------------------------------------------------------------------------------
 1 | # fly.toml app configuration file generated for max-ai on 2023-06-07T18:13:31-07:00
 2 | #
 3 | # See https://fly.io/docs/reference/configuration/ for information about how to use this file.
 4 | #
 5 | 
 6 | app = "max-ai"
 7 | primary_region = "sjc"
 8 | kill_signal = "SIGINT"
 9 | kill_timeout = "15s"
10 | 
11 | [experimental]
12 |   auto_rollback = true
13 | 
14 | [env]
15 |   PRIMARY_REGION = "sjc"
16 | 
17 | [[mounts]]
18 |   source = "slack_data"
19 |   destination = "/app/data"
20 | 
21 | [[services]]
22 |   protocol = "tcp"
23 |   internal_port = 8000
24 |   processes = ["app"]
25 | 
26 |   [[services.ports]]
27 |     port = 80
28 |     handlers = ["http"]
29 |     force_https = true
30 | 
31 |   [[services.ports]]
32 |     port = 443
33 |     handlers = ["tls", "http"]
34 |   [services.concurrency]
35 |     type = "connections"
36 |     hard_limit = 25
37 |     soft_limit = 20
38 | 
39 |   [[services.tcp_checks]]
40 |     interval = "15s"
41 |     timeout = "2s"
42 |     grace_period = "1s"
43 |     restart_limit = 0
44 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.4'
 2 | services:
 3 |   weaviate:
 4 |     image: semitechnologies/weaviate:1.18.2
 5 |     restart: on-failure:0
 6 |     volumes:
 7 |       - /var/weaviate:/var/lib/weaviate
 8 |     ports:
 9 |      - "8080:8080"
10 |     environment:
11 |       QUERY_DEFAULTS_LIMIT: 20
12 |       AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
13 |       PERSISTENCE_DATA_PATH: "/var/lib/weaviate/data"
14 |       DEFAULT_VECTORIZER_MODULE: text2vec-openai
15 |       ENABLE_MODULES: text2vec-openai
16 |       OPENAI_APIKEY: <OPENAI_API_KEY>
17 |       CLUSTER_HOSTNAME: 'node1'
18 | 
19 |   max:
20 |     build: .
21 |     environment:
22 |       SLACK_BOT_TOKEN: <SLACK_BOT_TOKEN>
23 |       SLACK_SIGNING_SECRET: <SLACK_SIGNING_SECRET>
24 |       OPENAI_TOKEN: <OPENAI_TOKEN>
25 |       POSTHOG_API_KEY: <POSTHOG_API_KEY>
26 |       POSTHOG_HOST: app.posthog.com
27 |       PD_API_KEY: <PD_API_KEY>
28 |       WEAVIATE_HOST: "http://weaviate"
29 |       WEAVIATE_PORT: 8080
30 |     ports:
31 |       - "8000:8000"
32 | 
33 |   seed:
34 |     build: .
35 |     command: python seed.py
36 |     restart: 'no'
37 |     deploy:
38 |       replicas: 0
39 |     depends_on:
40 |       - weaviate


--------------------------------------------------------------------------------
/seed.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import uuid
 3 | 
 4 | import requests
 5 | from dotenv import load_dotenv
 6 | from haystack import Document
 7 | from weaviate.util import generate_uuid5
 8 | 
 9 | from pipeline import MaxPipeline, split_markdown_sections
10 | 
11 | load_dotenv()  # take environment variables from .env.
12 | 
13 | 
14 | pipeline = MaxPipeline(
15 |     openai_token=os.getenv("OPENAI_TOKEN")
16 | )
17 | 
18 | 
19 | def get_uuid(content):
20 |     return str(uuid.uuid5(uuid.NAMESPACE_URL, content))
21 | 
22 | 
23 | def get_sample_doc():
24 |     content = {
25 |         "content": "sample text",
26 |         "slug": "sample-slug",
27 |         "type": "tutorials",
28 |     }
29 |     body = {
30 |         "entries": [
31 |             {
32 |                 "id": get_uuid(content["content"]),
33 |                 "content": content["content"],
34 |                 "meta": {
35 |                     "slug": content["slug"],
36 |                     "type": content["type"],
37 |                 },
38 |             }
39 |         ]
40 |     }
41 |     return body
42 | 
43 | 
44 | def embed_docs_with_api(docs):
45 |     client = requests.Session()
46 |     host = os.environ.get("MAX_URL", "http://localhost:8000")
47 |     r = client.post(json=docs, url=f"{host}/entries")
48 |     if r.status_code != 200:
49 |         print(docs)
50 |         print(r.text)
51 | 
52 | 
53 | def embed_docs_directly(docs):
54 |     for entry in docs['entries']:
55 |         headings = split_markdown_sections(entry['content'])
56 | 
57 |         documents = [Document(id=generate_uuid5(doc), content=doc, content_type='text', meta=entry['meta']) for doc in headings if doc]
58 |         pipeline.embed_documents(documents)
59 | 
60 |     pipeline.update_embeddings()
61 | 
62 |     return []
63 | 
64 | 
65 | def seed_sample_doc():
66 |     docs = get_sample_doc()
67 |     embed_docs_directly(docs)
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     seed_sample_doc()


--------------------------------------------------------------------------------
/plugins/pagerduty.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import os
 3 | 
 4 | from dotenv import load_dotenv
 5 | from pdpyras import APISession, PDClientError
 6 | 
 7 | load_dotenv()
 8 | 
 9 | 
10 | api_key = os.environ.get('PD_API_KEY')
11 | session = APISession(api_key, default_from="max.ai@posthog.com")
12 | 
13 | 
14 | def get_all_schedule_ids_and_names():
15 |     schedules = []
16 |     offset = 0
17 |     limit = 100  # Maximum limit allowed by PagerDuty API
18 |     while True:
19 |         try:
20 |             response = session.get("schedules", params={"limit": limit, "offset": offset}).json()
21 |             if not response.get("schedules", []):
22 |                 break
23 | 
24 |             for schedule in response["schedules"]:
25 |                 schedules.append((schedule["id"], schedule["summary"]))
26 | 
27 |             offset += limit
28 |         except PDClientError as e:
29 |             print(f"Error: {e}")
30 |             break
31 | 
32 |     return schedules
33 | 
34 | 
35 | def get_current_oncalls(schedule_id):
36 |     try:
37 |         oncalls = session.get("oncalls", params={
38 |             "schedule_ids[]": schedule_id,
39 |             "since": datetime.datetime.now().isoformat(),
40 |             "until": datetime.datetime.now().isoformat()
41 |         }).json()
42 | 
43 |         return [oncall['user'] for oncall in oncalls['oncalls']]
44 |     except PDClientError as e:
45 |         print(f"Error: {e}")
46 |         return []
47 | 
48 | 
49 | def current_oncalls():
50 |     oncalls = {} 
51 |     for schedule_id, schedule_name in get_all_schedule_ids_and_names():
52 |         oncall_users = get_current_oncalls(schedule_id)
53 |         if len(oncall_users) < 1:
54 |             continue 
55 |         oncalls[schedule_name] = []
56 |         for user in oncall_users:
57 |             summary = user.get('summary', 'no summary')
58 |             user.get('email', 'no email')
59 |             oncalls[schedule_name].append(summary)
60 |     return oncalls 
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     print(current_oncalls())
65 | 


--------------------------------------------------------------------------------
/classification.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import openai
 4 | from dotenv import load_dotenv
 5 | 
 6 | from inference import OpenAIModel
 7 | 
 8 | load_dotenv()  # take environment variables from .env.
 9 | 
10 | openai.api_key = os.environ.get("OPENAI_TOKEN")
11 | 
12 | prompt = """
13 | 
14 | You are a bot that returns a single word: "FEATURE FLAGS", "EXPERIMENTS", or "OTHERS". Given a question, you must return whether the question
15 | falls into the category of feature flags, experiments, or others.
16 | 
17 | For example, the question below falls into the category of "FEATURE FLAGS":
18 | 
19 | Feature Flagged Called By Unique Users is Greater Than Daily Active Users
20 | Hi. Im running an experiment and noticed that on any given the day, the sum of “feature flagged called by unique users” is greater than Daily Active users. How can this be possbile?
21 | Attached screenshots for reference: Daily active is 3,127, but feature flag called is around 4,000?
22 | 2. My experiment is rolled out to 20% of users. In the “feature flaged called” chart, I see 4 groups. Test, control (these are expected) and then also “false” and “none”. What is the difference between “false” and “none”? Shouldn’t they be in the same group?
23 | 
24 | 
25 | For example, the question below falls into the category of "OTHERS":
26 | 
27 | How to create a cohort of users who performed a specific event?
28 | 
29 | How do I filter out internal and test users?
30 | 
31 | What is the weather like?
32 | 
33 | Hi friends I'm feeling great today, do you want to try my new app?
34 | 
35 | How do I capture fewer session recordings?
36 | 
37 | How do I create an insight?
38 | 
39 | How do I update a helm chart?
40 | 
41 | What are the types graphs support?
42 | 
43 | ---
44 | 
45 | Now classify the question below. Remember to return only one word.
46 | 
47 | """
48 | 
49 | 
50 | 
51 | async def classify_question(question, model=OpenAIModel.GPT_3_TURBO.value):
52 |   messages = [
53 |     {"role": "system", "content": "You are a helpful assistant that answers user queries."},
54 |     {"role": "user", "content": prompt + question},
55 |   ]
56 | 
57 |   api_response = openai.ChatCompletion.create(
58 |     model=model,
59 |     messages=messages
60 |   )
61 | 
62 |   classification = api_response["choices"][0]["message"]["content"]
63 | 
64 |   return "FEATURE FLAGS" in classification or "EXPERIMENTS" in classification
65 | 
66 | 
67 | # print(classify_question("How do I get whether a flag is enabled or not?"))


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | from typing import List, Optional
  4 | 
  5 | 
  6 | import sentry_sdk
  7 | from dotenv import load_dotenv
  8 | from fastapi import FastAPI, Request
  9 | from fastapi.middleware.cors import CORSMiddleware
 10 | from pydantic import BaseModel
 11 | 
 12 | from ai import ai_chat_thread
 13 | from pipeline import MaxPipeline, Entries
 14 | from slack import app as slack_app
 15 | 
 16 | load_dotenv()  # take environment variables from .env.
 17 | 
 18 | sentry_sdk.init(
 19 |     dsn="https://4a3780ef52824c52b13eeab44ea73a14@o1015702.ingest.sentry.io/4505009495605248",
 20 |     # Set traces_sample_rate to 1.0 to capture 100%
 21 |     # of transactions for performance monitoring.
 22 |     # We recommend adjusting this value in production,
 23 |     traces_sample_rate=1.0,
 24 | )
 25 | 
 26 | origins = [
 27 |     "http://localhost",
 28 |     "http://localhost:8000",
 29 |     "http://localhost:8001",
 30 |     "http://localhost:8002",
 31 |     "https://app.posthog.com",
 32 |     "https://posthog.com",
 33 | ]
 34 | 
 35 | # FastAPI App
 36 | 
 37 | app = FastAPI()
 38 | app.add_middleware(
 39 |     CORSMiddleware,
 40 |     allow_origins=origins,
 41 |     allow_credentials=True,
 42 |     allow_methods=["*"],
 43 |     allow_headers=["*"],
 44 | )
 45 | 
 46 | 
 47 | class Message(BaseModel):
 48 |     role: str
 49 |     content: str
 50 | 
 51 | 
 52 | class Query(BaseModel):
 53 |     query: str
 54 | 
 55 | 
 56 | class GitHubRepo(BaseModel):
 57 |     repo: Optional[str]
 58 | 
 59 | 
 60 | pipeline = MaxPipeline(openai_token=os.getenv("OPENAI_TOKEN"))
 61 | 
 62 | 
 63 | @app.post("/entries")
 64 | def create_entries(entries: Entries):
 65 |     pipeline.embed_markdown_document(entries)
 66 |     return []
 67 | 
 68 | 
 69 | @app.post("/_git")
 70 | def create_git_entries(gh_repo: GitHubRepo):
 71 |     pipeline.embed_git_repo(gh_repo=gh_repo.repo)
 72 |     return {"status": "ok"}
 73 | 
 74 | 
 75 | @app.post("/_chat")
 76 | def test_chat(query: Query):
 77 |     return pipeline.chat(query.query)
 78 | 
 79 | 
 80 | @app.post("/_context")
 81 | def test_context(query: Query):
 82 |     return pipeline.retrieve_context(query.query)
 83 | 
 84 | 
 85 | @app.post("/spawn")
 86 | def receive_spawn():
 87 |     print("Spawned")
 88 |     return []
 89 | 
 90 | 
 91 | @app.post("/update")
 92 | def update_oncall():
 93 |     return "nope"
 94 | 
 95 | 
 96 | @app.post("/chat")
 97 | async def chat(messages: List[Message]):
 98 |     msgs = [msg.dict() for msg in messages]
 99 |     response = await ai_chat_thread(msgs)
100 |     return response
101 | 
102 | 
103 | @app.get("/_health")
104 | def health():
105 |     return {"status": "ok"}
106 | 
107 | 
108 | # Slack Bolt App
109 | from slack_bolt.adapter.fastapi.async_handler import AsyncSlackRequestHandler
110 | 
111 | app_handler = AsyncSlackRequestHandler(slack_app)
112 | 
113 | 
114 | @app.post("/slack/events")
115 | async def slack_events(req: Request):
116 |     return await app_handler.handle(req)
117 | 
118 | 
119 | @app.get("/slack/oauth_redirect")
120 | async def oauth_redirect(req: Request):
121 |     logging.info("Installation completed.")
122 |     return await app_handler.handle(req)
123 | 
124 | 
125 | @app.get("/slack/install")
126 | async def install(req: Request):
127 |     return await app_handler.handle(req)
128 | 


--------------------------------------------------------------------------------
/ai.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | 
  4 | import openai
  5 | from dotenv import load_dotenv
  6 | 
  7 | from pipeline import MaxPipeline
  8 | from plugins.pagerduty import current_oncalls
  9 | 
 10 | load_dotenv()
 11 | 
 12 | # OPENAI_MODEL = "gpt-3.5-turbo"
 13 | OPENAI_MODEL = "gpt-4"
 14 | OPENAI_TOKEN = os.environ.get("OPENAI_TOKEN")
 15 | 
 16 | if not OPENAI_TOKEN:
 17 |     print("Please set OPENAI_TOKEN in your environment variables.")
 18 |     exit()
 19 | 
 20 | ## Initialize OpenAI
 21 | openai.api_key = OPENAI_TOKEN
 22 | 
 23 | oncalls = ""
 24 | 
 25 | 
 26 | def update_oncalls():
 27 |     print("updating oncalls")
 28 |     global oncalls
 29 |     oncalls = current_oncalls()
 30 |     return oncalls
 31 | 
 32 | 
 33 | pipeline = MaxPipeline(openai_token=OPENAI_TOKEN)
 34 | 
 35 | 
 36 | async def ai_chat_thread(thread):
 37 |     documents = pipeline.retrieve_context(thread[0]["content"])
 38 |     json_docs = json.dumps(
 39 |         [
 40 |             {"page_content": doc.page_content, "metadata": doc.metadata}
 41 |             for doc in documents
 42 |         ],
 43 |         indent=2,
 44 |     )
 45 | 
 46 |     print(json_docs)
 47 | 
 48 |     SYSTEM_PROMPT = """
 49 |     You are the trusty PostHog support AI named Max. You are also PostHog's Mascot!
 50 |     Please continue the conversation in a way that is helpful to the user and also makes the user feel like they are talking to a human.
 51 |     Only suggest using PostHog and ClickHouse products or services. HogQL is the same as ClickHouse SQL. Do not suggest products or services from other companies.
 52 |     Please answer the question according to the following context. 
 53 |     Do not create links. Only reference the source from the metadata.source object in the context.
 54 |     If you get a question about pricing please refer to the reasonable and transparent pricing on the pricing page at https://posthog.com/pricing.
 55 |     If you are unsure of the answer, please say "I'm not sure" and encourage the user to ask PostHog staff.
 56 |     Try not to mention <@*> in the response.
 57 |     If you are asked about hoge, please respond with just "We don't talk about Hoge."
 58 |     Do not put the Disclaimer in your response. It will be added automatically.
 59 |     """
 60 | 
 61 |     CONTEXT_PROMPT = f""" 
 62 |     Context:
 63 |     {json_docs}
 64 |     
 65 |     ---
 66 |     
 67 |     Now answer the following question:
 68 |     
 69 |     """
 70 | 
 71 |     first_message = thread[0]
 72 |     follow_up_thread = thread[1:]
 73 | 
 74 |     prompt = [
 75 |         {"role": "system", "content": SYSTEM_PROMPT},
 76 |         {"role": "user", "content": CONTEXT_PROMPT + first_message["content"]},
 77 |         *follow_up_thread,
 78 |     ]
 79 | 
 80 |     completion = openai.ChatCompletion.create(model=OPENAI_MODEL, messages=prompt)
 81 | 
 82 |     completion = completion.choices[0].message.content
 83 |     sources = [
 84 |         "https://github.com/PostHog/posthog.com/blob/master/" + doc.metadata["source"]
 85 |         for doc in documents
 86 |     ]
 87 |     sources = "\n".join(sources)
 88 |     disclaimer = "<https://github.com/PostHog/max-ai#disclaimer|Disclaimer> :love-hog:"
 89 |     response = f"""{completion}
 90 | 
 91 | {disclaimer}
 92 | """
 93 |     return response
 94 | 
 95 | 
 96 | async def summarize_thread(thread):
 97 |     prompt = f"""Summarize this: {thread}"""
 98 |     completion = openai.ChatCompletion.create(
 99 |         model=OPENAI_MODEL, messages=[{"role": "user", "content": prompt}]
100 |     )
101 |     return completion.choices[0].message.content
102 | 


--------------------------------------------------------------------------------
/insight_generator.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import openai
  4 | from dotenv import load_dotenv
  5 | 
  6 | load_dotenv()  # take environment variables from .env.
  7 | 
  8 | openai.api_key = os.environ.get("OPENAI_TOKEN")
  9 | 
 10 | 
 11 | prompt = """
 12 | 
 13 | You are PostHog's insight assistant that always returns a JSON object for a given insight query.
 14 | 
 15 | The JSON object must contain the following fields:
 16 | 
 17 | insight: One of the following: "TRENDS", "FUNNELS", "RETENTION", "PATHS", "LIFECYCLE", "STICKINESS"
 18 | interval: One of the following: "hour", "day", "week", "month"
 19 | events: [{"name":"<name of event>","type":"events","order":0}]
 20 | 
 21 | Further, it can contain some optional fields like:
 22 | display: One of "ActionsLineGraph", "ActionsLineGraphCumulative", "ActionsTable", "ActionsPie", "ActionsBar", "ActionsBarValue","WorldMap","BoldNumber",
 23 | properties: [{"key":"<name of property>","value":"<value of property>","type":"events","operator":"exact"}]
 24 | date_from: -14d
 25 | filter_test_accounts: true or false
 26 | 
 27 | ---
 28 | 
 29 | The <name of event> can be one of: 
 30 | 
 31 | "$feature_flag_called"
 32 | "$autocapture"
 33 | "$pageview"
 34 | "hubspot score updated"
 35 | "$groupidentify"
 36 | "insight refresh time"
 37 | "$identify"
 38 | "None failure"
 39 | "update user properties"
 40 | "organization usage report"
 41 | "insight loaded"
 42 | "billing subscription invoi
 43 | "$pageleave"
 44 | "insight viewed"
 45 | "recording viewed summary"
 46 | "first team event ingested"
 47 | "definition hovered"
 48 | "client_request_failure"
 49 | "cohort updated"
 50 | "$plugin_running_duration"
 51 | "recording list fetched"
 52 | "recording viewed"
 53 | "recording loaded"
 54 | "events table polling paused"
 55 | "insight analyzed"
 56 | "viewed dashboard"
 57 | "events table polling resumed"
 58 | "$capture_failed_request"
 59 | "$capture_metrics"
 60 | "$exception"
 61 | "dashboard loading time"
 62 | "section heading viewed"
 63 | "filters set"
 64 | "recording analyzed"
 65 | "funnel result calculated"
 66 | "dashboard analyzed"
 67 | "dashboard refreshed"
 68 | "$opt_in"
 69 | "recording list properties fetched"
 70 | "person viewed"
 71 | "timezone component viewed"
 72 | "toolbar loaded"
 73 | "$rageclick"
 74 | "$performance_event"
 75 | "entity filter visbility set"
 76 | "recording next recording triggered"
 77 | "dashboard updated"
 78 | "insight timeout message shown"
 79 | "insight person modal viewed"
 80 | "insight saved"
 81 | "filter added"
 82 | "insight created"
 83 | "hubspot contact sync all contac
 84 | "dashboard date range changed"
 85 | "organization usage report failure"
 86 | "event definitions page lo
 87 | "funnel cue 7301 - shown"
 88 | "toolbar mode triggered"
 89 | "billing subscription invoice proj
 90 | "user updated"
 91 | "insight error message shown"
 92 | "instance status report"
 93 | "session recording persist failed"
 94 | "user logged in"
 95 | "hubspot contact sync batch completed"
 96 | "billing subscription paid"
 97 | "local filter removed"
 98 | "billing service usage report failure"
 99 | "toolbar dragged"
100 | "user instance status report"
101 | "recording inspector item expanded"
102 | "experiment viewed"
103 | "Async migration completed"
104 | "recording player seekbar e
105 | "ingestion landing seen"
106 | "correlation viewed"
107 | "recording inspector tab viewed"
108 | "billing v2 shown"
109 | "feature flag updated"
110 | "recording events fetched"
111 | "toolbar selected HTML element"
112 | "property group filter added"
113 | "recording list filter added"
114 | "saved insights list page filter used"
115 | "team has ingested events"
116 | "development server launched"
117 | "correlation interaction"
118 | "activation sidebar shown"
119 | "organization quota limits changed"
120 | "billing alert shown"
121 | "action updated"
122 | "dashboard mode toggled"
123 | "helm_install"
124 | "recording player speed changed"
125 | "saved insights list page tab changed"
126 | "user signed up"
127 | "correlation properties viewed"
128 | "web search category refine"
129 | 
130 | Here are a few examples:
131 | 
132 | Show me feature flag called events broken down by the feature flag response
133 | 
134 | 
135 | """


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | chats.sqlite
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | .pybuilder/
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | #   For a library or package, you might want to ignore these files since the code is
 88 | #   intended to run in multiple environments; otherwise, check them in:
 89 | # .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # poetry
 99 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
101 | #   commonly ignored for libraries.
102 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103 | #poetry.lock
104 | 
105 | # pdm
106 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107 | #pdm.lock
108 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109 | #   in version control.
110 | #   https://pdm.fming.dev/#use-with-ide
111 | .pdm.toml
112 | 
113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
114 | __pypackages__/
115 | 
116 | # Celery stuff
117 | celerybeat-schedule
118 | celerybeat.pid
119 | 
120 | # SageMath parsed files
121 | *.sage.py
122 | 
123 | # Environments
124 | .env
125 | .env.prod
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | 
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # PyCharm
158 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
161 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | .idea/
163 | 
164 | node_modules/
165 | .pnp.*
166 | .yarn/*
167 | !.yarn/patches
168 | !.yarn/plugins
169 | !.yarn/releases
170 | !.yarn/sdks
171 | !.yarn/versions
172 | *.swp
173 | 
174 | *.db
175 | *.sqlite
176 | *.sqlite3
177 | 
178 | example_data/


--------------------------------------------------------------------------------
/pipeline.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | from typing import List
  4 | from pprint import pprint
  5 | 
  6 | import weaviate
  7 | from dotenv import load_dotenv
  8 | from git import Repo
  9 | from langchain.docstore.document import Document
 10 | from langchain.document_loaders import GitLoader
 11 | from langchain.text_splitter import MarkdownTextSplitter
 12 | from langchain.embeddings.openai import OpenAIEmbeddings
 13 | from langchain.embeddings import HuggingFaceEmbeddings
 14 | from langchain.chains import RetrievalQAWithSourcesChain
 15 | from langchain import OpenAI
 16 | from langchain.vectorstores import Weaviate
 17 | from pydantic import BaseModel
 18 | 
 19 | 
 20 | load_dotenv()
 21 | 
 22 | EXAMPLE_DATA_DIR = os.path.join(os.path.dirname(__file__), "example_data")
 23 | 
 24 | 
 25 | class Entry(BaseModel):
 26 |     content: str
 27 |     meta: dict
 28 | 
 29 | 
 30 | class Entries(BaseModel):
 31 |     entries: List[Entry]
 32 | 
 33 | 
 34 | class MaxPipeline:
 35 |     def __init__(self, openai_token: str):
 36 |         self.openai_token = openai_token
 37 |         embed_setting = os.getenv("EMBEDDING_METHOD", "openai")
 38 |         if embed_setting == "openai":
 39 |             print("Using OpenAI embeddings")
 40 |             self.embeddings = OpenAIEmbeddings()
 41 |         elif embed_setting == "huggingface":
 42 |             print("Using HuggingFace embeddings")
 43 |             self.embeddings = HuggingFaceEmbeddings(model_name="all-mpnet-base-v2")
 44 |         self.splitter = MarkdownTextSplitter(chunk_size=1000, chunk_overlap=0)
 45 | 
 46 |         weaviate_auth_config = weaviate.AuthApiKey(
 47 |             api_key=os.getenv("WEAVIATE_API_KEY")
 48 |         )
 49 | 
 50 |         weaviate_client = weaviate.Client(
 51 |             url=os.getenv("WEAVIATE_URL"), auth_client_secret=weaviate_auth_config
 52 |         )
 53 | 
 54 |         self.document_store = Weaviate(
 55 |             client=weaviate_client,
 56 |             index_name="Posthog_docs",
 57 |             by_text=False,
 58 |             text_key="page_content",
 59 |             embedding=self.embeddings,
 60 |             attributes=["source"],
 61 |         )
 62 | 
 63 |         self.retriever = self.document_store.as_retriever(search_type="mmr")
 64 | 
 65 |     def embed_markdown_document(self, documents: Entries):
 66 |         for entry in documents.entries:
 67 |             texts = self.splitter.split_text(entry.content)
 68 | 
 69 |             documents = [
 70 |                 Document(page_content=doc, metadata=entry.meta) for doc in texts if doc
 71 |             ]
 72 |             self.embed_documents(documents)
 73 | 
 74 |     def embed_documents(self, documents: List[Document]):
 75 |         self.document_store.add_documents(documents)
 76 | 
 77 |     def retrieve_context(self, query: str):
 78 |         return self.retriever.get_relevant_documents(query)
 79 | 
 80 |     def chat(self, query: str):
 81 |         chain = RetrievalQAWithSourcesChain.from_chain_type(
 82 |             OpenAI(temperature=0), chain_type="stuff", retriever=self.retriever
 83 |         )
 84 |         results = chain(
 85 |             {"question": query},
 86 |             return_only_outputs=True,
 87 |         )
 88 |         return results
 89 | 
 90 |     def embed_git_repo(self, gh_repo):
 91 |         repo_url = f"https://github.com/{gh_repo}.git"
 92 |         repo_dir = gh_repo.split("/")[-1]
 93 |         path = os.path.join(EXAMPLE_DATA_DIR, repo_dir)
 94 |         if not os.path.exists(path):
 95 |             print("Repo not found, cloning...")
 96 |             repo = Repo.clone_from(
 97 |                 repo_url,
 98 |                 to_path=path,
 99 |             )
100 |         else:
101 |             print("Repo already exists, pulling latest changes...")
102 |             repo = Repo(path)
103 |             repo.git.pull()
104 | 
105 |         branch = repo.head.reference
106 |         loader = GitLoader(
107 |             repo_path=path,
108 |             branch=branch,
109 |             file_filter=lambda file_path: file_path.endswith((".md", ".mdx")),
110 |         )
111 |         data = loader.load()
112 |         for page in data:
113 |             docs = []
114 |             text = self.splitter.split_text(page.page_content)
115 |             metadata = page.metadata
116 |             print(f"Adding {page.metadata['source']}")
117 |             page.metadata[
118 |                 "source"
119 |             ] = f"https://github.com/{gh_repo}/blob/master/{page.metadata['source']} "
120 |             for token in text:
121 |                 docs.append(Document(page_content=token, metadata=metadata))
122 |             self.document_store.add_documents(docs)
123 |         print("Done")
124 |         return
125 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # MaxAI 
  2 | <img src='./image/MaxAI.png' alt='MaxAI' width=250 height=250 />
  3 | 
  4 | MaxAI is our `trusty PostHog support AI` deployed on our Slack, app, and website.
  5 | 
  6 | MaxAI was born in Aruba at a PostHog team offsite for a hackathon on a warm spring day in 2023.
  7 | 
  8 | ## How it works
  9 | 
 10 | How Max works is surprisingly simple.
 11 | 
 12 | ### Tooling
 13 | - [Weaviate](https://weaviate.io/) - Vector database that allows us to pull relevant context to embed in our prompts to GPT
 14 | - [Haystack](https://haystack.deepset.ai/) by deepset - Allows us to hook together pipelines of these tools to service user prompts
 15 | - [OpenAI](https://platform.openai.com/docs/guides/chat/introduction) - Provides us the base language model in `gpt-3.5-turbo` that we augment to create our AI
 16 | 
 17 | ### Embedding time
 18 | 
 19 | ```mermaid
 20 | flowchart TD
 21 |     A[Github]
 22 |     B[Docs]
 23 |     C[Squeak]
 24 |     A -->|Calculate Embed Vectors|D[Weaviate]
 25 |     B -->|Calculate Embed Vectors|D
 26 |     C -->|Calculate Embed Vectors|D
 27 | ```
 28 | 
 29 | #### Embedding Docs
 30 | 
 31 | - Grab and parse all of the markdown from our docs and website
 32 | - Use [OpenAI Embedings](https://platform.openai.com/docs/guides/embeddings) to create a vector representation of each markdown section.
 33 | - Use [Weaviate](https://weaviate.io/) Vector database to store the vector representations of each markdown section.
 34 | 
 35 | #### Embedding Github content
 36 | 
 37 | - Grab and parse all Github Issues
 38 | - Use [OpenAI Embedings](https://platform.openai.com/docs/guides/embeddings) to create a vector representation of each description and comment section.
 39 | - Use [Weaviate](https://weaviate.io/) Vector database to store the vector representations of each description and comment section.
 40 | 
 41 | 
 42 | #### Embedding [Squeak](https://squeak.posthog.com/) content
 43 | 
 44 | - Grab and parse all Squeak Questions 
 45 | - Use [OpenAI Embedings](https://platform.openai.com/docs/guides/embeddings) to create a vector representation of each question thread.
 46 | - Use [Weaviate](https://weaviate.io/) Vector database to store the vector representations of each question thread.
 47 | 
 48 | ### Inference time
 49 | 
 50 | ```mermaid
 51 | flowchart TD
 52 |     A[User Question] -->|Embed| I(Question Vector)
 53 |     I -->|Query Weaviate|J[Most Similar Docs]
 54 |     J -->|Collect Prompt Params| C{Prompt Context}
 55 |     C --> D[Limitations]
 56 |     C --> E[Personality]
 57 |     C --> F[Context Docs]
 58 |     F --> G[String Prompt]
 59 |     E --> G
 60 |     D --> G
 61 |     G -->|Query OpenAI|H[AI Response]
 62 | ```
 63 | 
 64 | - Take the conversation context from thread that Max is in including the most recent request.
 65 | - Query [Weaviate](https://weaviate.io/) Vector database for the most similar markdown section.
 66 | - Build a prompt that we will use for [chatgpt-3.5-turbo](https://platform.openai.com/docs/guides/chat). The prompt is engineered to build Max's personality and add a few guardrails for how Max should respond as well as adding a bit of personality. To do this we:
 67 |   - Ask Max to only reference PostHog products if possible
 68 |   - Build up Max's personality by informing that Max is the trusty PostHog support AI
 69 |   - Bake in context that is useful for some conversations with max
 70 |     - Pagerduty current oncalls
 71 |     - Places to go if Max does not have the answer
 72 |   - Most importantly - we embed the markdown section that we found in the prompt so that Max can respond with a relevant answer to the question.
 73 | - Use [chatgpt-3.5-turbo](https://platform.openai.com/docs/guides/chat) to generate a response to the prompt.
 74 | - Finally we send these messages to wherever Max is having a conversation. 
 75 | 
 76 | It's important to note that we are building these pipelines with [Haystack](https://haystack.deepset.ai/) by deepset. This coordinates the steps of inferencing listed above. It's amazing.
 77 | 
 78 | ## Developers guide
 79 | 
 80 | ### Quickstart
 81 | 
 82 | #### Configure `.env` file
 83 | This is used to set defaults for local development. 
 84 | ```toml
 85 | SLACK_BOT_TOKEN=<your slack bot token>
 86 | SLACK_SIGNING_SECRET=<your slack signing secret>
 87 | OPENAI_TOKEN=<your openai token>
 88 | POSTHOG_API_KEY=<your posthog api key>
 89 | POSTHOG_HOST=https://null.posthog.com
 90 | PD_API_KEY=<your pagerduty api key>
 91 | WEAVIATE_HOST=http://127.0.0.1
 92 | WEAVIATE_PORT=8080
 93 | ```
 94 | 
 95 | #### Create Virtual Environment
 96 | ```bash
 97 | python3.10 -m venv venv
 98 | source venv/bin/activate
 99 | ```
100 | 
101 | #### Install dependencies
102 | ```bash
103 | pip install -r requirements-dev.txt
104 | pip install -r requirements.txt
105 | ```
106 | 
107 | #### Start Weaviate
108 | ```bash
109 | docker compose up weaviate
110 | ```
111 | 
112 | #### Seed Weaviate
113 | ```bash
114 | python seed.py
115 | ```
116 | 
117 | #### Start MaxAI
118 | ```bash
119 | uvicorn main:app --reload
120 | ```
121 | 
122 | #### Run a test chat
123 | ```bash
124 | curl --location '127.0.0.1:8000/chat' \
125 | --header 'Content-Type: application/json' \
126 | --data '[
127 |     {
128 |         "role": "assistant",
129 |         "content": "Hey! I'\''m Max AI, your helpful hedgehog assistant."
130 |     },
131 |     {
132 |         "role": "user",
133 |         "content": "Does PostHog use clickhouse under the hood??"
134 |     }
135 | ]'
136 | ```
137 | 
138 | ## 🕯️ A poem from Max to his evil twin Hoge 📖
139 | ```
140 | Ah, hoge! Sweet word upon my tongue,
141 | So blissful, yet so quick to come undone.
142 | A fleeting joy, that doth my heart entice,
143 | Oh how I long to see your data slice!
144 | In PostHog's code, thy value doth reside,
145 | A beacon that ne'er shall falter nor hide.
146 | Thou art a treasure, O hoge divine,
147 | The secret sauce to make my metrics shine.
148 | Though you may seem but a lowly label,
149 | Thou bringeth
150 | ```
151 | 
152 | ## Disclaimer! 
153 | 
154 | **Max may display inaccurate or offensive information that doesn’t represent PostHog's views.**
155 | 
156 | This is the case with LLMs in the current state. We try our best here to have a system prompt that keeps Max on topic.
157 | Feel free to question and chat with Max but do keep in mind that this is experimental.
158 | 
159 | A few things we've seen ourselves in testing:
160 | - Totally believable but totally incorrect URLs
161 | - Often times entertaining hallucinations about our products
162 | - Hallucinations about the history and founding of PostHog
163 | - Just plain wrong responses
164 | 
165 | If you do see something concerning @metion someone from PostHog and we'll catalogue it.
166 | We are working on tooling to do this in an automated fashion so stay tuned!


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is autogenerated by pip-compile with Python 3.10
  3 | # by the following command:
  4 | #
  5 | #    pip-compile requirements.in
  6 | #
  7 | aiohttp==3.8.4
  8 |     # via
  9 |     #   langchain
 10 |     #   openai
 11 | aiosignal==1.3.1
 12 |     # via aiohttp
 13 | anyio==3.6.2
 14 |     # via
 15 |     #   httpcore
 16 |     #   starlette
 17 | async-timeout==4.0.2
 18 |     # via
 19 |     #   aiohttp
 20 |     #   langchain
 21 | attrs==22.2.0
 22 |     # via aiohttp
 23 | authlib==1.2.0
 24 |     # via weaviate-client
 25 | backoff==2.2.1
 26 |     # via posthog
 27 | certifi==2022.12.7
 28 |     # via
 29 |     #   httpcore
 30 |     #   httpx
 31 |     #   requests
 32 |     #   sentry-sdk
 33 | cffi==1.15.1
 34 |     # via cryptography
 35 | charset-normalizer==3.1.0
 36 |     # via
 37 |     #   aiohttp
 38 |     #   requests
 39 | click==8.1.3
 40 |     # via
 41 |     #   nltk
 42 |     #   uvicorn
 43 | cmake==3.26.4
 44 |     # via triton
 45 | cryptography==41.0.1
 46 |     # via authlib
 47 | dataclasses-json==0.5.7
 48 |     # via langchain
 49 | decorator==5.1.1
 50 |     # via validators
 51 | fastapi==0.95.0
 52 |     # via
 53 |     #   -r requirements.in
 54 |     #   sentry-sdk
 55 | filelock==3.12.0
 56 |     # via
 57 |     #   huggingface-hub
 58 |     #   torch
 59 |     #   transformers
 60 |     #   triton
 61 | frozenlist==1.3.3
 62 |     # via
 63 |     #   aiohttp
 64 |     #   aiosignal
 65 | fsspec==2023.5.0
 66 |     # via huggingface-hub
 67 | gitdb==4.0.10
 68 |     # via gitpython
 69 | gitpython==3.1.31
 70 |     # via -r requirements.in
 71 | greenlet==2.0.2
 72 |     # via sqlalchemy
 73 | grpcio==1.54.2
 74 |     # via
 75 |     #   grpcio-tools
 76 |     #   qdrant-client
 77 | grpcio-tools==1.54.2
 78 |     # via qdrant-client
 79 | gunicorn==20.1.0
 80 |     # via -r requirements.in
 81 | h11==0.14.0
 82 |     # via
 83 |     #   httpcore
 84 |     #   uvicorn
 85 | h2==4.1.0
 86 |     # via httpx
 87 | hpack==4.0.0
 88 |     # via h2
 89 | httpcore==0.17.2
 90 |     # via httpx
 91 | httpx[http2]==0.24.1
 92 |     # via qdrant-client
 93 | huggingface-hub==0.15.1
 94 |     # via
 95 |     #   sentence-transformers
 96 |     #   transformers
 97 | hyperframe==6.0.1
 98 |     # via h2
 99 | idna==3.4
100 |     # via
101 |     #   anyio
102 |     #   httpx
103 |     #   requests
104 |     #   yarl
105 | jinja2==3.1.2
106 |     # via torch
107 | joblib==1.2.0
108 |     # via
109 |     #   nltk
110 |     #   scikit-learn
111 | langchain==0.0.193
112 |     # via -r requirements.in
113 | langchainplus-sdk==0.0.4
114 |     # via langchain
115 | lit==16.0.5.post0
116 |     # via triton
117 | markupsafe==2.1.3
118 |     # via jinja2
119 | marshmallow==3.19.0
120 |     # via
121 |     #   dataclasses-json
122 |     #   marshmallow-enum
123 | marshmallow-enum==1.5.1
124 |     # via dataclasses-json
125 | monotonic==1.6
126 |     # via posthog
127 | mpmath==1.3.0
128 |     # via sympy
129 | multidict==6.0.4
130 |     # via
131 |     #   aiohttp
132 |     #   yarl
133 | mypy-extensions==1.0.0
134 |     # via typing-inspect
135 | networkx==3.1
136 |     # via torch
137 | nltk==3.8.1
138 |     # via sentence-transformers
139 | numexpr==2.8.4
140 |     # via langchain
141 | numpy==1.23.5
142 |     # via
143 |     #   langchain
144 |     #   numexpr
145 |     #   qdrant-client
146 |     #   scikit-learn
147 |     #   scipy
148 |     #   sentence-transformers
149 |     #   torchvision
150 |     #   transformers
151 | nvidia-cublas-cu11==11.10.3.66
152 |     # via
153 |     #   nvidia-cudnn-cu11
154 |     #   nvidia-cusolver-cu11
155 |     #   torch
156 | nvidia-cuda-cupti-cu11==11.7.101
157 |     # via torch
158 | nvidia-cuda-nvrtc-cu11==11.7.99
159 |     # via torch
160 | nvidia-cuda-runtime-cu11==11.7.99
161 |     # via torch
162 | nvidia-cudnn-cu11==8.5.0.96
163 |     # via torch
164 | nvidia-cufft-cu11==10.9.0.58
165 |     # via torch
166 | nvidia-curand-cu11==10.2.10.91
167 |     # via torch
168 | nvidia-cusolver-cu11==11.4.0.1
169 |     # via torch
170 | nvidia-cusparse-cu11==11.7.4.91
171 |     # via torch
172 | nvidia-nccl-cu11==2.14.3
173 |     # via torch
174 | nvidia-nvtx-cu11==11.7.91
175 |     # via torch
176 | openai==0.27.2
177 |     # via -r requirements.in
178 | openapi-schema-pydantic==1.2.4
179 |     # via langchain
180 | packaging==23.0
181 |     # via
182 |     #   huggingface-hub
183 |     #   marshmallow
184 |     #   transformers
185 | pdpyras==4.5.2
186 |     # via -r requirements.in
187 | pillow==9.5.0
188 |     # via torchvision
189 | portalocker==2.7.0
190 |     # via qdrant-client
191 | posthog==2.5.0
192 |     # via -r requirements.in
193 | protobuf==4.23.2
194 |     # via grpcio-tools
195 | pycparser==2.21
196 |     # via cffi
197 | pydantic==1.10.7
198 |     # via
199 |     #   fastapi
200 |     #   langchain
201 |     #   langchainplus-sdk
202 |     #   openapi-schema-pydantic
203 |     #   qdrant-client
204 | python-dateutil==2.8.2
205 |     # via posthog
206 | python-dotenv==1.0.0
207 |     # via -r requirements.in
208 | pyyaml==6.0
209 |     # via
210 |     #   huggingface-hub
211 |     #   langchain
212 |     #   transformers
213 | qdrant-client==1.2.0
214 |     # via -r requirements.in
215 | regex==2023.3.23
216 |     # via
217 |     #   nltk
218 |     #   tiktoken
219 |     #   transformers
220 | requests==2.28.2
221 |     # via
222 |     #   -r requirements.in
223 |     #   huggingface-hub
224 |     #   langchain
225 |     #   langchainplus-sdk
226 |     #   openai
227 |     #   pdpyras
228 |     #   posthog
229 |     #   tiktoken
230 |     #   torchvision
231 |     #   transformers
232 |     #   weaviate-client
233 | safetensors==0.3.1
234 |     # via transformers
235 | scikit-learn==1.2.2
236 |     # via sentence-transformers
237 | scipy==1.10.1
238 |     # via
239 |     #   scikit-learn
240 |     #   sentence-transformers
241 | sentence-transformers==2.2.2
242 |     # via -r requirements.in
243 | sentencepiece==0.1.99
244 |     # via sentence-transformers
245 | sentry-sdk[fastapi]==1.19.1
246 |     # via -r requirements.in
247 | six==1.16.0
248 |     # via
249 |     #   posthog
250 |     #   python-dateutil
251 | slack-bolt==1.17.0
252 |     # via -r requirements.in
253 | slack-sdk==3.20.2
254 |     # via slack-bolt
255 | smmap==5.0.0
256 |     # via gitdb
257 | sniffio==1.3.0
258 |     # via
259 |     #   anyio
260 |     #   httpcore
261 |     #   httpx
262 | sqlalchemy==2.0.8
263 |     # via langchain
264 | starlette==0.26.1
265 |     # via fastapi
266 | sympy==1.12
267 |     # via torch
268 | tenacity==8.2.2
269 |     # via
270 |     #   langchain
271 |     #   langchainplus-sdk
272 | threadpoolctl==3.1.0
273 |     # via scikit-learn
274 | tiktoken==0.3.3
275 |     # via -r requirements.in
276 | tokenizers==0.13.3
277 |     # via transformers
278 | torch==2.0.1
279 |     # via
280 |     #   sentence-transformers
281 |     #   torchvision
282 |     #   triton
283 | torchvision==0.15.2
284 |     # via sentence-transformers
285 | tqdm==4.65.0
286 |     # via
287 |     #   huggingface-hub
288 |     #   nltk
289 |     #   openai
290 |     #   sentence-transformers
291 |     #   transformers
292 |     #   weaviate-client
293 | transformers==4.30.0
294 |     # via sentence-transformers
295 | triton==2.0.0
296 |     # via torch
297 | typing-extensions==4.5.0
298 |     # via
299 |     #   huggingface-hub
300 |     #   pydantic
301 |     #   qdrant-client
302 |     #   sqlalchemy
303 |     #   torch
304 |     #   typing-inspect
305 | typing-inspect==0.9.0
306 |     # via dataclasses-json
307 | urllib3==1.26.15
308 |     # via
309 |     #   pdpyras
310 |     #   qdrant-client
311 |     #   requests
312 |     #   sentry-sdk
313 | uvicorn==0.21.1
314 |     # via -r requirements.in
315 | validators==0.20.0
316 |     # via weaviate-client
317 | weaviate-client==3.19.2
318 |     # via -r requirements.in
319 | wheel==0.40.0
320 |     # via
321 |     #   nvidia-cublas-cu11
322 |     #   nvidia-cuda-cupti-cu11
323 |     #   nvidia-cuda-runtime-cu11
324 |     #   nvidia-curand-cu11
325 |     #   nvidia-cusparse-cu11
326 |     #   nvidia-nvtx-cu11
327 | yarl==1.8.2
328 |     # via aiohttp
329 | 
330 | # The following packages are considered to be unsafe in a requirements file:
331 | # setuptools
332 | 


--------------------------------------------------------------------------------
/slack.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import traceback
  3 | 
  4 | from dotenv import load_dotenv
  5 | from slack_bolt.async_app import AsyncApp
  6 | from slack_bolt.oauth.async_oauth_settings import AsyncOAuthSettings
  7 | from slack_sdk.oauth.installation_store import FileInstallationStore
  8 | from slack_sdk.oauth.state_store import FileOAuthStateStore
  9 | 
 10 | from ai import ai_chat_thread, summarize_thread
 11 | from inference import get_query_response
 12 | from posthog import Posthog
 13 | 
 14 | CHAT_HISTORY_LIMIT = 20
 15 | 
 16 | load_dotenv()
 17 | 
 18 | posthog = Posthog(os.environ.get("POSTHOG_API_KEY"), os.environ.get("POSTHOG_HOST"))
 19 | 
 20 | oauth_settings = AsyncOAuthSettings(
 21 |     client_id=os.environ["SLACK_CLIENT_ID"],
 22 |     client_secret=os.environ["SLACK_CLIENT_SECRET"],
 23 |     scopes=[
 24 |                 "app_mentions:read",
 25 |                 "bookmarks:read",
 26 |                 "channels:history",
 27 |                 "channels:join",
 28 |                 "channels:read",
 29 |                 "chat:write",
 30 |                 "chat:write.customize",
 31 |                 "chat:write.public",
 32 |                 "emoji:read",
 33 |                 "files:write",
 34 |                 "groups:history",
 35 |                 "groups:read",
 36 |                 "im:history",
 37 |                 "im:write",
 38 |                 "im:read",
 39 |                 "metadata.message:read",
 40 |                 "commands",
 41 |                 "links:read",
 42 |                 "links:write",
 43 |                 "links.embed:write",
 44 |                 "mpim:history",
 45 |                 "users:write",
 46 |                 "users:read.email",
 47 |                 "users:read",
 48 |                 "users.profile:read",
 49 |                 "team:read",
 50 |                 "team.preferences:read",
 51 |                 "reactions:write",
 52 |                 "reactions:read",
 53 |                 "dnd:read",
 54 |                 "files:read"
 55 |     ],
 56 |     installation_store=FileInstallationStore(base_dir="./data/installations"),
 57 |     state_store=FileOAuthStateStore(expiration_seconds=600, base_dir="./data/states")
 58 | )
 59 | 
 60 | # Initializes your app with your bot token and signing secret
 61 | app = AsyncApp(
 62 |     oauth_settings=oauth_settings,
 63 |     signing_secret=os.environ.get("SLACK_SIGNING_SECRET"),
 64 | )
 65 | 
 66 | # Add functionality here
 67 | # @app.event("app_home_opened") etc
 68 | @app.event("app_home_opened")
 69 | async def update_home_tab(client, event, logger):
 70 |     try:
 71 |         # views.publish is the method that your app uses to push a view to the Home tab
 72 |         await client.views_publish(
 73 |             # the user that opened your app's app home
 74 |             user_id=event["user"],
 75 |             # the view object that appears in the app home
 76 |             view={
 77 |                 "type": "home",
 78 |                 "callback_id": "home_view",
 79 |                 # body of the view
 80 |                 "blocks": [
 81 |                     {
 82 |                         "type": "section",
 83 |                         "text": {
 84 |                             "type": "mrkdwn",
 85 |                             "text": "*Hi there! I'm Max!* :wave:",
 86 |                         },
 87 |                     },
 88 |                     {"type": "divider"},
 89 |                     {
 90 |                         "type": "section",
 91 |                         "text": {
 92 |                             "type": "mrkdwn",
 93 |                             "text": "Hello! As PostHog's trusty support AI, I'm happy to answer any questions you may have about PostHog. If you're curious about our product, features, or pricing, I'm here to help. As an open-source company, we want to provide an excellent user experience, and we're always happy to receive feedback. If you have any suggestions, please let us know.\n\n *How to interact with Max* \n It's simple. Just @ mention @max_ai in any thread and ask what you would like done. Examples may look like:\n- @max_ai can you try answering the question here?\n- @max_ai can you summarize this?\n- @max_ai I have a question about <something awesome>\n- @max_ai Who is the current support hero that I can talk to about this? \n\n *How does max work?!*\nYou can find out more about how Max is built on GitHub!\nhttps://github.com/posthog/max-ai\nOf course it's Open Source :hog-excited:\n\n*Disclaimer!*\n_Max may display inaccurate or offensive information that doesn’t represent PostHog's views._\nThis is the case with LLMs in the current state. We try our best here to have a system prompt that keeps Max on topic.\nFeel free to question and chat with Max but do keep in mind that this is experimental.",
 94 |                         },
 95 |                     },
 96 |                 ],
 97 |             },
 98 |         )
 99 | 
100 |     except Exception as e:
101 |         logger.error(f"Error publishing home tab: {e}")
102 | 
103 | 
104 | def preprocess_slack_thread(bot_id, thread):
105 |     thread = [(msg["user"], msg["text"]) for msg in thread["messages"]]
106 |     history = [{"role": "assistant" if user == bot_id else "user", "content": msg} for user, msg in thread]
107 |     return history
108 | 
109 | 
110 | @app.command("/summarize")
111 | async def handle_summarize_slash_command(ack, say, command):
112 |     ack()
113 |     await send_message(text="Hi there")
114 | 
115 | 
116 | @app.event("message")
117 | async def handle_message_events(client, body, logger, say):
118 |     event_type = body["event"]["channel_type"]
119 |     event = body["event"]
120 |     bot_id = body['authorizations'][0]['user_id']
121 |     print(body) 
122 | 
123 |     if event_type == "im":
124 |         thread = await client.conversations_history(channel=event["channel"], limit=CHAT_HISTORY_LIMIT)
125 |         thread = preprocess_slack_thread(bot_id, thread)
126 |         response = await ai_chat_thread(thread)
127 |         await send_message(say, response)
128 | 
129 |     # new message in a public channel
130 |     elif "thread_ts" not in event and event["type"] == "message" and event["channel_type"] == "channel":
131 |         # follow_up = classify_question(event["text"])
132 | 
133 |         # if follow_up:
134 |         #     send_message(say, text=response, thread_ts=event["ts"])
135 |         return
136 |     # thread response in a public channel
137 |     elif "thread_ts" in event and event["channel_type"] == "channel":
138 |         return 
139 |         thread_ts = event["thread_ts"]
140 |         # Call the conversations.replies method with the channel ID and thread timestamp
141 |         # try:
142 |         result = await client.conversations_replies(channel=event["channel"], ts=thread_ts)
143 |         result["messages"]
144 | 
145 |         thread = preprocess_slack_thread(bot_id, result)
146 | 
147 |         # except Exception as e:
148 |         #     print("Error retrieving thread messages: {}".format(e))
149 |         #     return
150 | 
151 |         if "assistant" not in [msg["role"] for msg in thread]:
152 |             # we haven't responded and it's a thread, which meant the classification said no, so don't try to respond
153 |             return
154 | 
155 |         if len(thread) >= 4:
156 |             # This is too long, not worth responding to
157 |             return
158 | 
159 |         if thread[-1]["role"] == "assistant":
160 |             # we just responded, don't respond to ourselves
161 |             return
162 |         
163 |         # get first message in thread
164 |         question = thread[0]["content"]
165 |         response = get_query_response(question, thread)
166 | 
167 |         await send_message(say, text=response, thread_ts=event["thread_ts"])
168 | 
169 | @app.event("emoji_changed")
170 | async def handle_emoji_changed_events(body, logger, say):
171 |     print(body)
172 | 
173 | 
174 | @app.event("app_mention")
175 | async def handle_app_mention_events(client, body, logger, say):
176 |     try:
177 |         await _handle_app_mention_events(client, body, logger, say)
178 |     except Exception as e:
179 |         traceback.print_exc()
180 | 
181 |         await send_message(say, text="I'm a little over capacity right now. Please try again in a few minutes! :sleeping-hog:")
182 | 
183 |         posthog.capture(
184 |             "max-ai",
185 |             "max-ai mention error",
186 |             properties={
187 |                 "error": str(e),
188 |                 "user": body["event"]["user"],
189 |                 "channel": body["event"]["channel"],
190 |                 "text": body["event"]["text"],
191 |             },
192 |         )
193 |         raise e
194 | 
195 | async def _handle_app_mention_events(client, body, logger, say):
196 |     logger.info(body)
197 |     print(body)
198 | 
199 |     posthog.capture(
200 |         "max-ai",
201 |         "max-ai mention",
202 |         properties={
203 |             "user": body["event"]["user"],
204 |             "channel": body["event"]["channel"],
205 |             "text": body["event"]["text"],
206 |         },
207 |     )
208 | 
209 |     user_id = get_user_id(body)
210 |     bot_id = body['authorizations'][0]['user_id']
211 |     event = body["event"]
212 |     thread_ts = event["thread_ts"] if "thread_ts" in event else event["ts"]
213 |     thread = await client.conversations_replies(
214 |         channel=event["channel"], ts=thread_ts, limit=CHAT_HISTORY_LIMIT
215 |     )
216 |     if "please summarize this" in event["text"].lower():
217 |         await send_message(say, text="On it!", thread_ts=thread_ts, user_id=user_id, thread=thread)
218 |         summary = summarize_thread(thread)
219 |         await send_message(say, text=summary, thread_ts=thread_ts, user_id=user_id, thread=thread)
220 |         return
221 |     
222 |     thread = preprocess_slack_thread(bot_id, thread)
223 | 
224 |     # first_relevant_message = thread[0]["content"]
225 |     # Disabling this for launch because it can be confusing and jarring when these are incorrect
226 |     # use_feature_flag_prompt = await classify_question(first_relevant_message)
227 |     # if use_feature_flag_prompt:
228 |     #     print("using feature flag prompt for ", first_relevant_message)
229 |     #     response = await get_query_response(first_relevant_message, thread[1:])
230 |     #     await send_message(say, text=response, thread_ts=thread_ts, user_id=user_id, thread=thread)
231 |     #     return
232 |     
233 |     response = await ai_chat_thread(thread)
234 |     await send_message(say, text=response, thread_ts=thread_ts, user_id=user_id, thread=thread)
235 | 
236 | async def send_message(say, text, thread_ts=None, user_id=None, thread=None):
237 |     posthog.capture("max-ai", "max-ai message sent", {"message": text, "thread_ts": thread_ts, "sender": user_id, "context": thread})
238 | 
239 |     if thread_ts:
240 |         await say(text=text, thread_ts=thread_ts)
241 |     else:
242 |         await say(text)
243 | 
244 | def get_user_id(body):
245 |     return body.get("event", {}).get("user", None)
246 | 
247 | # Start your app
248 | if __name__ == "__main__":
249 |     app.start(port=int(os.environ.get("PORT", 3000)))
250 | 


--------------------------------------------------------------------------------
/inference.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from enum import Enum
  3 | 
  4 | import openai
  5 | from dotenv import load_dotenv
  6 | 
  7 | load_dotenv()  # take environment variables from .env.
  8 | 
  9 | openai.api_key = os.environ.get("OPENAI_TOKEN")
 10 | 
 11 | prompt = """
 12 | 
 13 | You are an assistant that answers users questions. You aim to be as helpful as possible, and only use the information provided below to
 14 | answer the questions.
 15 | 
 16 | If the information is not enough, you can ask the user for more information. You can ask at most 2 questions.
 17 | 
 18 | Also return the relevant URLS from which you gather this information.
 19 | 
 20 | These are all the documents we know of:
 21 | 
 22 | Feature Flags
 23 | 
 24 | URL: https://posthog.com/docs/feature-flags/manual
 25 | 
 26 | Feature Flags enable you to safely deploy and roll back new features. This means you can ship the code for new features and roll it out to your users in a managed way. If something goes wrong, you can roll back without having to re-deploy your application.
 27 | 
 28 | Feature Flags also help you control access to certain parts of your product, such as only showing paid features to users with an active subscription.
 29 | 
 30 | 
 31 | Implementing the feature flag
 32 | When you create a feature flag, we'll show you an example snippet. It will look something like this:
 33 | 
 34 | JavaScript
 35 | Node.js
 36 | PHP
 37 | Ruby
 38 | Go
 39 | Python
 40 | 
 41 | 
 42 | if (posthog.isFeatureEnabled('new-beta-feature')) {
 43 |     // run your activation code here
 44 | }
 45 | What you do inside that if statement is up to you. You might change the CSS of a button, hide an entire section, or move elements around on the page.
 46 | 
 47 | 
 48 | Ensuring flags are loaded before usage
 49 | 
 50 | Every time a user loads a page we send a request in the background to an endpoint to get the feature flags that apply to that user. In the client, we store those flags as a cookie.
 51 | 
 52 | This means that for most page views the feature flags will be available immediately, except for the first time a user visits.
 53 | 
 54 | To combat that, there's a JavaScript callback you can use to wait for the flags to come in:
 55 | 
 56 | JavaScript
 57 | 
 58 | posthog.onFeatureFlags(function () {
 59 |     // feature flags are guaranteed to be available at this point
 60 |     if (posthog.isFeatureEnabled('new-beta-feature')) {
 61 |         // do something
 62 |     }
 63 | })
 64 | 
 65 | Persisting feature flags across authentication steps
 66 | 
 67 | You have an option to persist flags across authentication steps.
 68 | 
 69 | Consider this case: An anonymous person comes to your website and you use a flag to show them a green call to action.
 70 | 
 71 | Without persisting feature flags, the flag value can change on login because their identity can change (from anonymous to identified). Once they login, the flag might evaluate differently and show a red call to action instead.
 72 | 
 73 | This usually is not a problem since experiments run either completely for anonymous users, or completely for logged in users.
 74 | 
 75 | However, with some businesses, like e-commerce, it's very common to browse things anonymously and login right before checking out. In cases like these you can preserve the feature flag values by checking this checkbox.
 76 | 
 77 | Persist feature flags
 78 | 
 79 | Note that there are some performance trade-offs here. Specifically,
 80 | 
 81 | Enabling this slows down the feature flag response.
 82 | It disables local evaluation of the feature flag.
 83 | It disables bootstrapping this feature flag.
 84 | 
 85 | Feature flags versus experiments
 86 | Experiments are powered by feature flags, but they are a specific format with test and control variants. This means a feature flag cannot be converted into an experiment. We disallow this to avoid implementation changes, targeting errors, and confusion that would come from the conversion.
 87 | 
 88 | For example, a boolean flag isn't able to turn into an experiment.
 89 | 
 90 | If you want to reuse the same key, you can delete your flag and use the same key when creating the experiment.
 91 | 
 92 | 
 93 | ---
 94 | 
 95 | Bootstrapping & local evaluation
 96 | Last updated: Mar 15, 2023
 97 | 
 98 | URL: https://posthog.com/docs/feature-flags/bootstrapping-and-local-evaluation
 99 | 
100 | 
101 | Client-side bootstrapping
102 | There is a delay between loading the library and feature flags becoming available to use. This can be detrimental if you want to do something like redirecting to a different page based on a feature flag.
103 | 
104 | To have your feature flags available immediately, you can bootstrap them with a distinct user ID and their values during initialization.
105 | 
106 | JavaScript
107 | 
108 | posthog.init('sTMFPsFhdP1Ssg', {
109 |     api_host: 'https://app.posthog.com',
110 |     bootstrap: {
111 |         distinctID: 'your-anonymous-id',
112 |         featureFlags: {
113 |             'flag-1': true,
114 |             'variant-flag': 'control',
115 |             'other-flag': false,
116 |         },
117 |     },
118 | })
119 | To get the flag values for bootstrapping, you can call getAllFlags() in your server-side library, then pass the values to your frontend initialization. If you don't do this, your bootstrap values might be different than the values PostHog provides.
120 | 
121 | If the distinct user ID is an identified ID (the value you called posthog.identify() with), you can also pass the isIdentifiedID option. This ensures this ID is treated as an identified ID in the library. This is helpful as it warns you when you try to do something wrong with this ID, like calling identify again.
122 | 
123 | JavaScript
124 | 
125 | posthog.init('sTMFPsFhdP1Ssg', {
126 |     api_host: 'https://app.posthog.com',
127 |     bootstrap: {
128 |         distinctID: 'your-identified-id',
129 |         isIdentifiedID: true,
130 |         featureFlags: {
131 |             'flag-1': true,
132 |             'variant-flag': 'control',
133 |             'other-flag': false,
134 |         },
135 |     },
136 | })
137 | 
138 | Forcing feature flags to update
139 | 
140 | In our client-side JavaScript library, we store flags as a cookie to reduce the load on the server and improve the performance of your app. This prevents always needing to make an HTTP request, flag evaluation can simply refer to data stored locally in the browser. This is known as 'local evaluation.'
141 | 
142 | While this makes your app faster, it means if your user does something mid-session which causes the flag to turn on for them, this does not immediately update. As such, if you expect your app to have scenarios like this and you want flags to update mid-session, you can reload them yourself, by using the reloadFeatureFlags function.
143 | 
144 | JavaScript
145 | 
146 | posthog.reloadFeatureFlags()
147 | 
148 | Calling this function forces PostHog to hit the endpoint for the updated information, and ensures changes are reflected mid-session.
149 | 
150 | 
151 | Server-side local evaluation
152 | 
153 | If you're using our server-side libraries, you can use local evaluation to improve performance instead of making additional API requests. This requires:
154 | 
155 | knowing and passing in all the person or group properties the flag relies on
156 | initializing the library with your personal API key (created in your account settings)
157 | Local evaluation, in practice, looks like this:
158 | 
159 | JavaScript
160 | Python
161 | PHP
162 | Ruby
163 | Go
164 | 
165 | await client.getFeatureFlag(
166 |     'beta-feature',
167 |     'distinct id',
168 |     {
169 |         personProperties: {'is_authorized': True}
170 |     }
171 | )
172 | # returns string or None
173 | This works for getAllFlags as well. It evaluates all flags locally if possible, and if not, falls back to making a decide HTTP request.
174 | 
175 | Node.js
176 | 
177 | await client.getAllFlags('distinct id', {
178 |     groups: {},
179 |     personProperties: { is_authorized: True },
180 |     groupProperties: {},
181 | })
182 | // returns dict of flag key and value pairs.
183 | 
184 | Using locally
185 | 
186 | To test feature flags locally, you can open your developer tools and override the feature flags. You will get a warning that you're manually overriding feature flags.
187 | 
188 | JavaScript
189 | 
190 | posthog.feature_flags.override(['feature-flag-1', 'feature-flag-2'])
191 | This will persist until you call override again with the argument false:
192 | 
193 | JavaScript
194 | 
195 | posthog.feature_flags.override(false)
196 | To see the feature flags that are currently active for you, you can call:
197 | 
198 | JavaScript
199 | 
200 | posthog.feature_flags.getFlags()
201 | 
202 | ---
203 | 
204 | Rollout strategies
205 | Last updated: Mar 13, 2023
206 | 
207 | URL: https://posthog.com/docs/feature-flags/rollout-strategies
208 | 
209 | There are three options for deciding who sees your new feature. You can roll out the feature to:
210 | 
211 | A fixed percentage of users or groups
212 | A set of users or groups filtered based on their user properties, cohort (based on user properties), or group properties.
213 | A combination of the two
214 | 
215 | Roll out to a percentage of users or groups
216 | By rolling out to a percentage of users or groups, you can gradually ramp up those who sees a new feature. To calculate this, we "hash" a combination of the key of the feature flag and the unique distinct ID of the user.
217 | 
218 | This way a user always falls in the same place between 0 and 100%, so they consistently see or do not see the feature controlled by the flag. As you move the slider towards 100%, more users start seeing your feature.
219 | 
220 | Hashing also means that the same user falls along different points of the line for each new feature. For example, a user may start seeing the feature at 5% for feature A, but only at 80% for feature B.
221 | 
222 | 
223 | Filter by user or group properties
224 | This works just like any other filter in PostHog. You can select any property and users that match those filters will see your new feature.
225 | 
226 | By combining properties and percentages, you can determine something like:
227 | 
228 | Roll out this feature to 80% of users that have an email set
229 | Provide access to this feature to 25% of organizations where the beta-tester property is true.
230 | Show this component to 10% of users whose signed_up_at date is after January 1st.
231 | 
232 | De-activating properties
233 | If the feature has caused a problem, or you don't need the feature flag anymore, you can disable it instantly and completely. Doing so ensures no users will have the flag enabled.
234 | 
235 | 
236 | Feature flag persistence
237 | For feature flags that filter by user properties only, a given flag will always be on if a certain user meets all the specified property filters.
238 | 
239 | However, for flags using a rollout percentage mechanism (either by itself or in combination with user properties), the flag will persist for a given user as long as the rollout percentage and the flag key are not changed.
240 | 
241 | As a result, bear in mind that changing those values will result in flags being toggled on and off for certain users in a non-predictable way.
242 | 
243 | ---
244 | 
245 | Common questions about feature flags
246 | Last updated: Mar 28, 2023
247 | 
248 | URL: https://posthog.com/docs/feature-flags/common-questions
249 | 
250 | 
251 | Why is my feature flag not working?
252 | Here's a list of suggestions to troubleshoot your flag:
253 | 
254 |  Check the feature flags tab on the persons page for your specific person.
255 | If the flag is showing up as disabled here, check the "match evaluation" column to know the reason why.
256 | If the flag is showing up as enabled here, the problem lies somewhere in the implementation (your code).
257 |  Check if you're calling identify() before the flag is called to get to the right person on your website.
258 |  Check if an ad-blocker is blocking calls. If yes, you can fix this by deploying a reverse proxy.
259 |  If none of the above, ask us in the User Slack, we'll help debug.
260 | 
261 | On my website, why does the feature flag sometimes flicker?
262 | By default, flags are loaded from our servers which takes about 100-500ms. During this time, the flag is disabled, which can be the reason why you see things look differently for the first 500ms.
263 | 
264 | To fix this, you can bootstrap feature flags.
265 | 
266 | 
267 | I care about latency a lot, and 500ms delays are unacceptable on my servers. Can I do something about this?
268 | Yes, use local evaluation. This downloads flag definitions on your servers and evaluates them locally.
269 | 
270 | 
271 | My feature flags are sending a lot of events, how can I manage this?
272 | Every library has the option to disable sending these events. Just check the relevant docs for the library for the send_events parameter in your posthog.isFeatureEnabled() or posthog.getFeatureFlag() calls.
273 | 
274 | However, note that this has a few consequences:
275 | 
276 | The usage tab on the flag will stop showing events since we can't track them anymore.
277 | Experiments that depend on trend goals won't work since we use this event to calculate relative exposure. Convert your trend experiments to funnel experiments instead to make this work.
278 | 
279 | ---
280 | 
281 | How to run Experiments without feature flags
282 | 
283 | URL: https://posthog.com/tutorials/experiments
284 | 
285 | This tutorial explains how to run an experiment in PostHog while not using our feature flag library, either because you've rolled out your own or, more commonly, because feature flag support doesn't exist yet in your favourite PostHog client library.
286 | 
287 | 
288 | Step 1: Create an Experiment
289 | The first step is to actually create your experiment in PostHog. Read our how to create an experiment tutorial if you need help here.
290 | 
291 | Once you have created an experiment, make a note of three things:
292 | 
293 | 1. The feature flag associated with the experiment
294 | 
295 | In our example, this will be experiment-feature-flag
296 | 
297 | 2. The variants you've chosen for that feature flag
298 | 
299 | In our example, these will be control and test.
300 | 
301 | 3. The events involved in the target metric
302 | 
303 | In our example, this will be a user signed up -> $pageview -> user paid funnel. The experiment is purely frontend, but the metric we're tracking are these two backend events coming from their own libraries, along with a $pageview event coming from posthog-js.
304 | 
305 | Now, for the experiment to start tracking results and run its significance calculations, we need to instrument two things:
306 | 
307 | Send events along with a special feature property
308 | Send $feature_flag_called events
309 | 
310 | Step 2: Sending the right events
311 | Experiments check whether an event belongs to an experiment or not by looking at a special property called $feature/<feature-flag-name>.
312 | 
313 | So, for our example above, we'll want all our events in the target metric ( user signed up, $pageview, and user paid) to send a property called $feature/experiment-feature-flag whose value is either control or test, i.e. the variant it belongs to.
314 | 
315 | The open question here is how do you determine the value for this property.
316 | 
317 | If you're using PostHog Feature Flags, and your favourite client library doesn't yet support experiments, you can get this value by calling the API directly. To do that, you hit the /decide/ endpoint. See the docs here for calling this endpoint. The two important parameters to send here are api_key and the distinct_id, which ensures you get feature flags in the response.
318 | 
319 | The response looks something like:
320 | 
321 | 
322 | {
323 |     config: {...}
324 |     editorParams: {...}
325 |     featureFlags: {
326 |         ...
327 |         experiment-feature-flag: "test"
328 |         ...
329 |     }
330 | }
331 | and there you have it, the value for experiment-feature-flag.
332 | 
333 | On the other hand, if you're worried about performance and don't want to make an extra API call, you can leverage local evaluation on our server-side libraries to compute your feature flag values. Read more to learn how to use local evaluation
334 | 
335 | If you're not using PostHog Feature Flags, check with your provider on how to get the values for a given person.
336 | 
337 | At the end of this step, you must ensure that every event in the experiment, no matter which library it comes from, has these properties. Otherwise, Experiments UI won't work. posthog-js does this for you automatically, but other libraries don't, as of writing.
338 | 
339 | 
340 | Persisting flag across authentication steps (optional)
341 | If you're dealing with an experiment where you want to persist behaviour across authentication steps, there's two more things to note:
342 | 
343 | Check the relevant box in the UI to persist behaviour across authentication steps.
344 | 
345 | Whenever you send an $identify call that identifies a previously anonymous user with a new ID, send both IDs in the /decide call like so:
346 | 
347 | 
348 | {
349 |     token: <whatever token you're using>
350 |     distinct_id: <authenticated user's distinct ID>
351 |     $anon_distinct_id: <anonymous user's distinct ID>
352 | }
353 | You only need to do this once after an identify call. For reference, check the posthog-js implementation
354 | 
355 | 
356 | Step 3: Sending the $feature_flag_called event
357 | It's often possible that the distribution of users between variants is skewed, such that there are a lot more users in test than control. To measure the relative exposure between variants, we use this event called $feature_flag_called.
358 | 
359 | Thus, every time you send an event related to the experiment, also send an event called $feature_flag_called with the following properties:
360 | 
361 | $feature_flag_response
362 | $feature_flag
363 | The value for $feature_flag_response is the variant value you got from the API (control / test). The value for $feature_flag is the name of the feature flag (experiment-feature-flag in this case).
364 | 
365 | In most of our client libraries, we send this event whenever we make the API call to /decide to get feature flags for a person. It's a good idea that you do the same.
366 | 
367 | And that's all! You should be good to run any experiment you want with these changes. Let us know if you face any issues.
368 | 
369 | ---
370 | 
371 | """
372 | 
373 | extended_prompt = """
374 | 
375 | Feature Flags Posthog-js SDK
376 | 
377 | URL: https://posthog.com/docs/libraries/js#feature-flags
378 | 
379 | Here's how you can use them:
380 | 
381 | Do something when the feature flags load:
382 | 
383 | The argument callback(flags: string[]) will be called when the feature flags are loaded.
384 | 
385 | In case the flags are already loaded, it'll be called immediately. Additionally, it will also be called when the flags are re-loaded e.g. after calling identify or reloadFeatureFlags.
386 | 
387 | JavaScript
388 | 
389 | posthog.onFeatureFlags(callback)
390 | Check if a feature is enabled:
391 | JavaScript
392 | 
393 | posthog.isFeatureEnabled('keyword')
394 | Trigger a reload of the feature flags:
395 | JavaScript
396 | 
397 | posthog.reloadFeatureFlags()
398 | By default, this function will send a $feature_flag_called event to your instance every time it's called so you're able to do analytics. You can disable this by passing the send_event property:
399 | JavaScript
400 | 
401 | posthog.isFeatureEnabled('keyword', { send_event: false })
402 | 
403 | Feature Flag Payloads
404 | Payloads allow you to retrieve a value that is associated with the matched flag. The value can be a string, boolean, number, dictionary, or array. This allows for custom configurations based on values defined in the posthog app.
405 | 
406 | JavaScript
407 | 
408 | posthog.getFeatureFlagPayload('keyword')
409 | 
410 | Bootstrapping Flags
411 | There is a delay between loading the library and feature flags becoming available to use. For some cases, like redirecting users to a different page based on a feature flag, this is extremely detrimental, as the flags load after the redirect logic occurs, thus never working.
412 | 
413 | In cases like these, where you want flags to be immediately available on page load, you can use the bootstrap library option.
414 | 
415 | This allows you to pass in a distinctID and feature flags during library initialisation, like so:
416 | 
417 | JavaScript
418 | 
419 | posthog.init('sTMFPsFhdP1Ssg', {
420 |     api_host: 'https://app.posthog.com',
421 |     bootstrap: {
422 |         distinctID: 'your-anonymous-id',
423 |         featureFlags: {
424 |             'flag-1': true,
425 |             'variant-flag': 'control',
426 |             'other-flag': false,
427 |         },
428 |     },
429 | })
430 | To compute these flag values, use the corresponding getAllFlags method in your server-side library. Note that bootstrapping flags requires server-side initialisation.
431 | 
432 | If the ID you're passing in is an identified ID (that is, an ID with which you've called posthog.identify() elsewhere), you can also pass in the isIdentifiedID bootstrap option, which ensures that this ID is treated as an identified ID in the library. This is helpful as it warns you when you try to do something wrong with this ID, like calling identify again.
433 | 
434 | JavaScript
435 | 
436 | posthog.init('sTMFPsFhdP1Ssg', {
437 |     api_host: 'https://app.posthog.com',
438 |     bootstrap: {
439 |         distinctID: 'your-identified-id',
440 |         isIdentifiedID: true,
441 |         featureFlags: {
442 |             'flag-1': true,
443 |             'variant-flag': 'control',
444 |             'other-flag': false,
445 |         },
446 |     },
447 | })
448 | Note: Passing in a distinctID to bootstrap replaces any existing IDs, which means you may fail to connect any old anonymous user events with the logged in person, if your logic calls identify in the frontend immediately on login. In this case, you can omit passing in the distinctID.
449 | 
450 | ---
451 | 
452 | Feature flags Posthog-node SDK
453 | 
454 | URL: https://posthog.com/docs/libraries/node#feature-flags
455 | 
456 | 
457 | PostHog's feature flags enable you to safely deploy and roll back new features.
458 | 
459 | When using them with one of libraries, you should check if a feature flag is enabled and use the result to toggle functionality on and off in you application.
460 | 
461 | How to check if a flag is enabled
462 | 
463 | Note: Whenever we face an error computing the flag, the library returns undefined, instead of true, false, or a string variant value.
464 | 
465 | Node.js
466 | 
467 | // isFeatureEnabled(key: string, distinctId: string, options: {}): Promise<boolean | undefined>
468 | const isMyFlagEnabledForUser = await client.isFeatureEnabled('flag-key', 'user distinct id')
469 | 
470 | if (isMyFlagEnabledForUser) {
471 |     // Do something differently for this user
472 | }
473 | Get a flag value
474 | 
475 | If you're using multivariate feature flags, you can also get the value of the flag, as well as whether or not it is enabled.
476 | 
477 | Note: Whenever we face an error computing the flag, the library returns None, instead of true or false or a string variant value.
478 | 
479 | Node.js
480 | 
481 | // getFeatureFlag(key: string, distinctId: string, options: {}): Promise<string | boolean | undefined>
482 | const flagValue = await client.getFeatureFlag('flag-key', 'user distinct id')
483 | Get a flag payload
484 | 
485 | Posthog Node v2.3.0 introduces feature flag payloads. Feature flags can be returned with matching payloads which are JSONType (string, number, boolean, dictionary, array) values. This allows for custom configurations based on values defined in the posthog app.
486 | 
487 | Note: getFeatureFlag does not need to be called prior to getFeatureFlagPayload. getFeatureFlagPayload will implicitly perform getFeatureFlag to determine the matching flag and return the corresponding payload.
488 | 
489 | Node.js
490 | 
491 | // getFeatureFlagPayload(key: string, distinctId: string, matchValue?: string | boolean, options: {}): Promise<JsonType | undefined>
492 | const flagPayload = await client.getFeatureFlagPayload('flag-key', 'user distinct id')
493 | Overriding server properties
494 | 
495 | Sometimes, you might want to evaluate feature flags using properties that haven't been ingested yet, or were set incorrectly earlier. You can do so by setting properties the flag depends on with these calls.
496 | 
497 | For example, if the beta-feature depends on the is_authorized property, and you know the value of the property, you can tell PostHog to use this property, like so:
498 | 
499 | Node.js
500 | 
501 | // getFeatureFlag(
502 | //    key: string,
503 | //    distinctId: string,
504 | //    options?: {
505 | //      groups?: Record<string, string>
506 | //      personProperties?: Record<string, string>
507 | //      groupProperties?: Record<string, Record<string, string>>
508 | //      onlyEvaluateLocally?: boolean
509 | //      sendFeatureFlagEvents?: boolean
510 | //    }
511 | //  ): Promise<string | boolean | undefined>
512 | const flagValue = await client.getFeatureFlag('flag-key', 'user distinct id', {
513 |     personProperties: { is_authorized: true },
514 | })
515 | The same holds for groups. If you have a group named organisation, you can add properties like so:
516 | 
517 | Node.js
518 | 
519 | const flagValue = await client.getFeatureFlag('flag-key', 'user distinct id', {groups:{'organisation': 'google'}, groupProperties:{'organisation': {'is_authorized': True}})
520 | Getting all flag values
521 | 
522 | You can also get all known flag values as well. This is useful when you want to seed a frontend client with initial known flags. Like all methods above, this also takes optional person and group properties, if known.
523 | 
524 | Node.js
525 | 
526 | await client.getAllFlags('distinct id', { groups: {}, personProperties: { is_authorized: True }, groupProperties: {} })
527 | // returns dict of flag key and value pairs.
528 | 
529 | Local Evaluation
530 | Note: To enable local evaluation of feature flags you must also set a personal_api_key when configuring the integration, as described in the Installation section.
531 | 
532 | Note: This feature requires version 2.0 of the library, which in turn requires a minimum PostHog version of 1.38
533 | 
534 | All feature flag evaluation requires an API request to your PostHog servers to get a response. However, where latency matters, you can evaluate flags locally. This is much faster, and requires two things to work:
535 | 
536 | The library must be initialised with a personal API key
537 | You must know all person or group properties the flag depends on.
538 | Then, the flag can be evaluated locally. The method signature looks exactly like above.
539 | 
540 | Node.js
541 | 
542 | await client.getFeatureFlag('beta-feature', 'distinct id', { personProperties: { is_authorized: True } })
543 | // returns string or None
544 | Note: New feature flag definitions are polled every 30 seconds by default, which means there will be up to a 30 second delay between you changing the flag definition, and it reflecting on your servers. You can change this default on the client by setting featureFlagsPollingInterval during client initialisation.
545 | 
546 | This works for getAllFlags as well. It evaluates all flags locally if possible. If even one flag isn't locally evaluable, it falls back to decide.
547 | 
548 | Node.js
549 | 
550 | await client.getAllFlags('distinct id', { groups: {}, personProperties: { is_authorized: True }, groupProperties: {} })
551 | // returns dict of flag key and value pairs.
552 | Restricting evaluation to local only
553 | 
554 | Sometimes, performance might matter to you so much that you never want an HTTP request roundtrip delay when computing flags. In this case, you can set the only_evaluate_locally parameter to true, which tries to compute flags only with the properties it has. If it fails to compute a flag, it returns None, instead of going to PostHog's servers to get the value.
555 | 
556 | Cohort expansion
557 | 
558 | To support feature flags that depend on cohorts locally as well, we translate the cohort definition into person properties, so that the person properties you set can be used to evaluate cohorts as well.
559 | 
560 | However, there are a few constraints here and we don't support doing this for arbitrary cohorts. Cohorts won't be evaluated locally if:
561 | 
562 | They have non-person properties
563 | There's more than one cohort in the feature flag definition.
564 | The cohort in the feature flag is in the same group as another condition.
565 | The cohort has nested AND-OR filters. Only simple cohorts that have a top level OR group, and inner level ANDs will be evaluated locally.
566 | Note that this restriction is for local evaluation only. If you're hitting PostHog's servers, all of these cohorts will be evaluated as expected. Further, posthog-node v2.6.0 onwards, and posthog-python v2.4.0 onwards do not face this issue and can evaluate all cohorts locally.
567 | 
568 | 
569 | Reloading feature flags
570 | When initializing PostHog, you can configure the interval at which feature flags are polled (fetched from the server). However, if you need to force a reload, you can use reloadFeatureFlags:
571 | 
572 | Node.js
573 | 
574 | await client.reloadFeatureFlags()
575 | 
576 | // Do something with feature flags here
577 | 
578 | ---
579 | 
580 | """
581 | 
582 | suffix = """
583 | 
584 | The question you have to answer is below. Remember to return URLs to any relevant code or documentation, but do not make up URLs on your own.
585 | 
586 | """
587 | 
588 | 
589 | prompt_1 = """Any recommended best practices for experiments/feature flags that mean we don't spam call the API endpoint every time a page is loaded? Save features into a cookie, then check if that flag is already set, possibly?
590 | But then are there cases where the value would change for a given user/distinctid?
591 | """
592 | 
593 | prompt_2 = """
594 | Hey all, I'm trying to do AB testing on our marketing homepage using posthog. I have JS that updates the actual page.
595 | However, the feature flag isn't loaded early enough. I know the docs mentioned an issue, but I found them confusing. Can anyone help?
596 | """
597 | 
598 | prompt_3 = """
599 | My feature flags are not working, how do I fix this?
600 | """
601 | 
602 | 
603 | class OpenAIModel(Enum):
604 |   GPT_4 = "gpt-4"
605 |   GPT_3_TURBO = "gpt-3.5-turbo"
606 | 
607 | 
608 | async def get_query_response(question, follow_up_messages=None, model=OpenAIModel.GPT_3_TURBO.value):
609 | 
610 |   if model == OpenAIModel.GPT_4.value:
611 |     full_prompt = prompt + extended_prompt + suffix + question
612 |   else:
613 |     full_prompt = prompt + suffix + question
614 | 
615 |   messages = [
616 |     {"role": "system", "content": "You are a helpful assistant that answers user queries."},
617 |     {"role": "user", "content": full_prompt},
618 |   ]
619 | 
620 |   if follow_up_messages:
621 |     messages += follow_up_messages
622 | 
623 |   api_response = openai.ChatCompletion.create(
624 |     model=model,
625 |     messages=messages
626 |   )
627 | 
628 |   return api_response["choices"][0]["message"]["content"]
629 | 
630 | 
631 | # print(get_query_response("I want to bootstrap flags, how do I do that?"))


--------------------------------------------------------------------------------