├── helpers
    ├── commands
    │   ├── __init__.py
    │   ├── setup.py
    │   └── news.py
    ├── functions
    │   ├── discord_verify.py
    │   ├── discord_request.py
    │   ├── count_citations.py
    │   ├── data_utils.py
    │   ├── category_utils.py
    │   ├── progress_tracker.py
    │   ├── renumber_citations.py
    │   ├── llm_runner.py
    │   ├── openai_runner.py
    │   ├── send_profile_to_db.py
    │   ├── llm_summary.py
    │   ├── api_utils.py
    │   ├── process_citations.py
    │   ├── discord_updates.py
    │   └── format_data.py
    └── config
    │   └── llm_schemas.py
├── images
    ├── secrets_modal.png
    ├── example_discord.png
    └── sources_tech_bot_discord.png
├── requirements.txt
├── .gitignore
├── app.py
└── README.md


/helpers/commands/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/images/secrets_modal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ilsilfverskiold/ai-personalized-tech-reports-discord/HEAD/images/secrets_modal.png


--------------------------------------------------------------------------------
/images/example_discord.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ilsilfverskiold/ai-personalized-tech-reports-discord/HEAD/images/example_discord.png


--------------------------------------------------------------------------------
/images/sources_tech_bot_discord.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ilsilfverskiold/ai-personalized-tech-reports-discord/HEAD/images/sources_tech_bot_discord.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | fastapi>=0.110.0
 2 | modal
 3 | requests
 4 | pydantic
 5 | python-dotenv
 6 | PyNaCl
 7 | aiohttp
 8 | llama-index-core
 9 | llama-index-llms-gemini
10 | google-generativeai
11 | google-genai
12 | llama-index-llms-openai
13 | openai
14 | pymongo[srv]>=4.6
15 | 
16 | 


--------------------------------------------------------------------------------
/helpers/functions/discord_verify.py:
--------------------------------------------------------------------------------
 1 | import nacl.signing
 2 | 
 3 | 
 4 | def verify_signature(pk_hex: str, sig_hex: str, ts: str, body: bytes) -> bool:
 5 |     try:
 6 |         nacl.signing.VerifyKey(bytes.fromhex(pk_hex)).verify(ts.encode() + body, bytes.fromhex(sig_hex))
 7 |         return True
 8 |     except Exception:
 9 |         return False
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/helpers/functions/discord_request.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from fastapi import Request
 3 | from helpers.functions.discord_verify import verify_signature
 4 | 
 5 | async def extract_verified_body(request: Request) -> bytes | None:
 6 |     sig = request.headers.get("X-Signature-Ed25519")
 7 |     ts = request.headers.get("X-Signature-Timestamp")
 8 |     body = await request.body()
 9 |     pk = os.environ.get("DISCORD_PUBLIC_KEY")
10 |     if not (sig and ts and pk and verify_signature(pk, sig, ts, body)):
11 |         return None
12 |     return body
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/helpers/functions/count_citations.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | def count_total_citations(assembled: Dict) -> int:
 4 |     total_count = 0
 5 |     for category_type in assembled.values():
 6 |         if isinstance(category_type, dict):
 7 |             for category_data in category_type.values():
 8 |                 if isinstance(category_data, list):
 9 |                     for keyword_obj in category_data:
10 |                         citations = keyword_obj.get("citations", [])
11 |                         total_count += len(citations)
12 |                 elif isinstance(category_data, dict):
13 |                     for sort_data in category_data.values():
14 |                         if isinstance(sort_data, list):
15 |                             for keyword_obj in sort_data:
16 |                                 citations = keyword_obj.get("citations", [])
17 |                                 total_count += len(citations)
18 |     
19 |     return total_count
20 | 


--------------------------------------------------------------------------------
/helpers/functions/data_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List
 2 | 
 3 | def add_profile_keywords(assembled: Dict, profile: Dict) -> None:
 4 |     """Add user's tracked keywords to the assembled data structure."""
 5 |     profile_keywords = profile.get("keywords", []) or []
 6 |     if not profile_keywords:
 7 |         return
 8 |     
 9 |     if "keywords" not in assembled:
10 |         assembled["keywords"] = {}
11 |     
12 |     for keyword in profile_keywords:
13 |         if keyword:
14 |             if "profile" not in assembled["keywords"]:
15 |                 assembled["keywords"]["profile"] = []
16 |             
17 |             keyword_obj = {"keyword": keyword}
18 |             assembled["keywords"]["profile"].append(keyword_obj)
19 | 
20 | def get_all_keyword_objects(assembled: Dict) -> List[Dict]:
21 |     """Extract all keyword objects from the assembled data structure."""
22 |     all_keyword_objects = []
23 |     for category_type in assembled.values():
24 |         if isinstance(category_type, dict):
25 |             for category_data in category_type.values():
26 |                 if isinstance(category_data, list):
27 |                     all_keyword_objects.extend(category_data)
28 |                 elif isinstance(category_data, dict):
29 |                     for sort_data in category_data.values():
30 |                         if isinstance(sort_data, list):
31 |                             all_keyword_objects.extend(sort_data)
32 |     return all_keyword_objects
33 | 


--------------------------------------------------------------------------------
/helpers/functions/category_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List
 2 | from helpers.config.llm_schemas import CATEGORY_MAP
 3 | 
 4 | def find_category_name(input_category: str) -> str:
 5 |     """Find the normalized category name from input string."""
 6 |     input_lower = input_category.lower().strip()
 7 |     if input_lower in CATEGORY_MAP:
 8 |         return input_lower
 9 |     for short_name, long_name in CATEGORY_MAP.items():
10 |         if input_lower == long_name.lower():
11 |             return short_name
12 |     return None
13 | 
14 | def normalize_profile_categories(profile: Dict, time_period_override: str = None) -> tuple[List[str], List[str], str]:
15 |     """Normalize profile categories and return major, minor categories and time period."""
16 |     def normalize_list(raw: List[str]) -> List[str]:
17 |         seen, out = set(), []
18 |         for category in raw or []:
19 |             category_name = find_category_name(category)
20 |             if category_name and category_name not in seen:
21 |                 seen.add(category_name)
22 |                 out.append(category_name)
23 |         return out
24 | 
25 |     major = normalize_list(profile.get("major_categories") or [])
26 |     minor = normalize_list(profile.get("minor_categories") or [])
27 |     
28 |     major_set = set(major)
29 |     minor = [cat for cat in minor if cat not in major_set]
30 |     
31 |     if time_period_override:
32 |         period = time_period_override.lower()
33 |     else:
34 |         period = (profile.get("time_period") or "weekly").lower()
35 |     
36 |     return major, minor, period
37 | 


--------------------------------------------------------------------------------
/helpers/functions/progress_tracker.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | import time
 3 | from helpers.functions.discord_updates import patch_original
 4 | 
 5 | def start_progress_tracker(application_id: str, token: str, total_keywords: int, facts_done_event):
 6 |     """Start background thread to send progress updates to Discord."""
 7 |     
 8 |     def progress_tracker():
 9 |         time.sleep(15) 
10 |         if not facts_done_event.is_set():
11 |             patch_original(application_id, token, f"We dig into all of these sources one by one, to drag out what's interesting.")
12 |             patch_original(application_id, token, f"Each keyword can have hundreds of sources, so it may take a while.")
13 |             time.sleep(30) 
14 |             if not facts_done_event.is_set():
15 |                 patch_original(application_id, token, "You can check back here later.")
16 |             time.sleep(30) 
17 |             if not facts_done_event.is_set():
18 |                 patch_original(application_id, token, "We're almost there, remember go do something else.")
19 |             time.sleep(30) 
20 |             if not facts_done_event.is_set():
21 |                 patch_original(application_id, token, "Since you're first we are digging for the first time today.")
22 |                 patch_original(application_id, token, "The first run of the day is always slow for LLM concurrency limits.")
23 |             time.sleep(15) 
24 |             if not facts_done_event.is_set():
25 |                 patch_original(application_id, token, "You can check back here later.")
26 |     
27 |     tracker_thread = threading.Thread(target=progress_tracker)
28 |     tracker_thread.daemon = True
29 |     tracker_thread.start()
30 |     return tracker_thread
31 | 


--------------------------------------------------------------------------------
/helpers/functions/renumber_citations.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Any
 2 | import re
 3 | 
 4 | 
 5 | def renumber_keywords_and_citations(assembled: Dict) -> Dict:
 6 |     keyword_counter = 1
 7 |     for category_type in assembled.values():
 8 |         if isinstance(category_type, dict):
 9 |             for category_data in category_type.values():
10 |                 if isinstance(category_data, list):
11 |                     for keyword_obj in category_data:
12 |                         if keyword_obj.get("keyword"):
13 |                             _process_keyword_object(keyword_obj, keyword_counter)
14 |                             keyword_counter += 1
15 |                 elif isinstance(category_data, dict):
16 |                     for sort_data in category_data.values():
17 |                         if isinstance(sort_data, list):
18 |                             for keyword_obj in sort_data:
19 |                                 if keyword_obj.get("keyword"):
20 |                                     _process_keyword_object(keyword_obj, keyword_counter)
21 |                                     keyword_counter += 1
22 |     return assembled
23 | 
24 | 
25 | def _process_keyword_object(keyword_obj: Dict, keyword_num: int) -> None:
26 |     keyword_obj["keyword_number"] = keyword_num
27 |     
28 |     citations = keyword_obj.get("citations", [])
29 |     if not citations:
30 |         return
31 |     
32 |     old_to_new_citation = {}
33 |     for i, citation in enumerate(citations, 1):
34 |         old_n = citation.get("n")
35 |         if old_n:
36 |             new_citation_num = f"{keyword_num}:{i}"
37 |             old_to_new_citation[old_n] = new_citation_num
38 |             citation["n"] = new_citation_num
39 |     
40 |     summary = keyword_obj.get("summary", "")
41 |     if summary:
42 |         updated_summary = _update_inline_citations(summary, old_to_new_citation)
43 |         keyword_obj["summary"] = updated_summary
44 |     
45 |     interesting = keyword_obj.get("interesting", [])
46 |     if interesting:
47 |         updated_interesting = []
48 |         for item in interesting:
49 |             updated_item = _update_inline_citations(item, old_to_new_citation)
50 |             updated_interesting.append(updated_item)
51 |         keyword_obj["interesting"] = updated_interesting
52 | 
53 | def _update_inline_citations(text: str, citation_mapping: Dict[int, str]) -> str:
54 |     def replace_citation(match):
55 |         old_num = int(match.group(1))
56 |         new_citation = citation_mapping.get(old_num)
57 |         if new_citation:
58 |             return f"[{new_citation}]"
59 |         return match.group(0)
60 |     
61 |     updated_text = re.sub(r'\[(\d+)\]', replace_citation, text)
62 |     return updated_text
63 | 


--------------------------------------------------------------------------------
/helpers/functions/llm_runner.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import traceback
 3 | from typing import Type, Any
 4 | import json
 5 | from llama_index.llms.gemini import Gemini
 6 | from llama_index.core.program import LLMTextCompletionProgram
 7 | from llama_index.llms.openai import OpenAI as OpenAILLM
 8 | 
 9 | def run_llm_structured(
10 |     prompt_template: str,
11 |     output_cls: Type[Any],
12 |     variables: dict,
13 |     model: str | None = None,
14 |     provider: str = "gemini",
15 |     retries: int = 2,
16 |     system_template: str | None = None
17 | ):
18 |     last_error: Exception | None = None
19 |     for attempt in range(retries + 1):
20 |         try:
21 |             if provider == "gemini":
22 |                 api_key = os.environ.get("GOOGLE_API_KEY")
23 |                 llm = Gemini(
24 |                     api_key=api_key,
25 |                     model=model or "models/gemini-2.5-flash",
26 |                     temperature=0.2,
27 |                     max_tokens=1024,
28 |                 )
29 |             elif provider == "openai":
30 |                 api_key = os.environ.get("OPENAI_API_KEY")
31 |                 llm_kwargs = {
32 |                     "api_key": api_key,
33 |                     "model": model or "gpt-4o-mini",
34 |                     'json_mode': True,
35 |                     "temperature": 0.2,
36 |                     "max_tokens": 1024,
37 |                 }
38 |                 llm = OpenAILLM(**llm_kwargs)
39 |             else:
40 |                 raise ValueError(f"Unknown provider: {provider}")
41 | 
42 |             formatted_prompt = system_template + "\n\n" + prompt_template.format(**variables)
43 | 
44 |             print(f"LLM formatted prompt: {formatted_prompt}")
45 |             program_kwargs = {
46 |                 "output_cls": output_cls,
47 |                 "prompt_template_str": formatted_prompt,
48 |                 "llm": llm,
49 |             }
50 |             
51 |             program = LLMTextCompletionProgram.from_defaults(**program_kwargs)
52 |             result = program()
53 | 
54 |             if isinstance(result, output_cls):
55 |                 return result
56 |             if isinstance(result, dict):
57 |                 return output_cls(**result)
58 |             if isinstance(result, str):
59 |                 try:
60 |                     data = json.loads(result)
61 |                 except Exception:
62 |                     start = result.find("{")
63 |                     end = result.rfind("}")
64 |                     if start != -1 and end != -1 and end > start:
65 |                         data = json.loads(result[start : end + 1])
66 |                     else:
67 |                         raise ValueError("Model output is not valid JSON string")
68 |                 return output_cls(**data)
69 | 
70 |             raise TypeError(f"Unexpected program output type: {type(result)}")
71 |         except Exception as e:
72 |             last_error = e
73 |             print(f"run_llm_structured attempt {attempt + 1} failed: {e}")
74 |             print(traceback.format_exc())
75 |             continue
76 |     raise last_error if last_error else RuntimeError("Unknown error in run_llm_structured")
77 | 
78 | 
79 | 


--------------------------------------------------------------------------------
/helpers/functions/openai_runner.py:
--------------------------------------------------------------------------------
 1 | from typing import Type
 2 | from pydantic import BaseModel
 3 | import json, os, traceback
 4 | from openai import OpenAI
 5 | 
 6 | def run_openai_structured(
 7 |     system_prompt: str,
 8 |     user_prompt: str,
 9 |     output_cls: Type[BaseModel],
10 |     model: str = "gpt-5",
11 |     retries: int = 2,
12 |     reasoning_effort: str = "medium",
13 | ):
14 |     client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
15 |     last_error = None
16 | 
17 |     for attempt in range(retries + 1):
18 |         try:
19 |             print(f"OpenAI attempt {attempt + 1}/{retries + 1}")
20 | 
21 |             schema_dict = output_cls.model_json_schema()
22 |             schema_str = json.dumps(schema_dict, indent=2)
23 |             
24 |             response = client.responses.create(
25 |                 model=model,
26 |                 instructions=(
27 |                     system_prompt
28 |                     + f"\n\nYou must respond with valid json that matches this exact schema:\n{schema_str}."
29 |                 ),
30 |                 input=f"{user_prompt}. Return a json object that matches the provided schema exactly. No prose.",
31 |                 text={"format": {"type": "json_object"}, "verbosity": "medium"},
32 |                 reasoning={"effort": reasoning_effort},
33 |                 tools=[],
34 |                 include=["reasoning.encrypted_content"],
35 |             )
36 | 
37 |             content = getattr(response, "output_text", None)
38 |             if not content:
39 |                 content = ""
40 |                 for item in getattr(response, "output", []) or []:
41 |                     for block in getattr(item, "content", []) or []:
42 |                         if getattr(block, "type", "") in ("output_text", "input_text"):
43 |                             content = getattr(block, "text", "") or ""
44 |                             if content:
45 |                                 break
46 |                     if content:
47 |                         break
48 | 
49 |             if not content:
50 |                 raise ValueError("No textual content returned by Responses API.")
51 | 
52 |             try:
53 |                 data = json.loads(content)
54 |             except json.JSONDecodeError:
55 |                 start, end = content.find("{"), content.rfind("}")
56 |                 if start == -1 or end == -1 or end <= start:
57 |                     raise
58 |                 data = json.loads(content[start : end + 1])
59 | 
60 |             try:
61 |                 return output_cls(**data)
62 |             except Exception as validation_error:
63 |                 print(f"Pydantic validation failed: {validation_error}")
64 |                 raise validation_error
65 | 
66 |         except Exception as e:
67 |             last_error = e
68 |             print(f"OpenAI attempt {attempt + 1} failed: {e}")
69 |             if "validation" in str(e).lower():
70 |                 print(f"This appears to be a schema mismatch - check field names in Pydantic model")
71 |             print(traceback.format_exc())
72 |             continue
73 | 
74 |     print(f"All OpenAI attempts failed. Last error: {last_error}")
75 |     return None
76 | 


--------------------------------------------------------------------------------
/helpers/functions/send_profile_to_db.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Any, Optional
 2 | from helpers.functions.llm_runner import run_llm_structured
 3 | from helpers.config.llm_schemas import ProfileNotesResponse, PROMPT_PROFILE_NOTES
 4 | from helpers.functions.discord_updates import patch_original
 5 | import traceback
 6 | import os
 7 | from pymongo import MongoClient
 8 | from datetime import datetime, timezone
 9 | 
10 | def _get_mongo_collection() -> Any:
11 |     uri = os.environ.get("MONGO_DB_URI")
12 |     if not uri:
13 |         raise RuntimeError("MONGO_DB_URI not set")
14 |     client = MongoClient(uri)
15 |     db_name = "Discord"
16 |     db = client[db_name]
17 |     return db["user_profiles"]
18 | 
19 | def send_profile_to_db(profile_data: Dict, application_id: Optional[str] = None, token: Optional[str] = None) -> None:
20 |     try:
21 |         notes: ProfileNotesResponse = run_llm_structured(
22 |             prompt_template="The user's profile summary: {summary}",
23 |             variables={"summary": profile_data.get('summary')},
24 |             output_cls=ProfileNotesResponse,
25 |             model='models/gemini-2.5-flash',
26 |             provider='gemini',
27 |             system_template=PROMPT_PROFILE_NOTES,
28 |         )
29 |         user_id = profile_data.get("user_id")
30 |         doc = {
31 |             "user_id": user_id,
32 |             "username": profile_data.get("user_name"),
33 |             "name": profile_data.get("global_name"),
34 |             "user_interests": (profile_data.get("responses", {}) or {}).get("interests"),
35 |             "user_keywords_input": (profile_data.get("responses", {}) or {}).get("keywords"),
36 |             "user_connecting_keywords": (profile_data.get("responses", {}) or {}).get("connecting_keywords"),
37 |             "user_summary_style": (profile_data.get("responses", {}) or {}).get("summary_style"),
38 |             "personality": getattr(notes, "personality", None),
39 |             "major_categories": getattr(notes, "major_categories", []) or [],
40 |             "minor_categories": getattr(notes, "minor_categories", []) or [],
41 |             "keywords": getattr(notes, "keywords", []) or [],
42 |             "time_period": getattr(notes, "time_period", None),
43 |             "concise_summaries": getattr(notes, "concise_summaries", False),
44 |         }
45 | 
46 |         try:
47 |             col = _get_mongo_collection()
48 |             col.update_one(
49 |                 {"user_id": user_id},
50 |                 {
51 |                     "$set": {**doc, "updated_at": datetime.now(timezone.utc)},
52 |                     "$setOnInsert": {"created_at": datetime.now(timezone.utc)},
53 |                 },
54 |                 upsert=True,
55 |             )
56 |             if application_id and token:
57 |                 patch_original(application_id, token, "**Profile setup complete.** Your personalized profile is ready. Try `/news` whenever you want.")
58 |                 
59 |         except Exception as db_err:
60 |             print("MongoDB error:", db_err)
61 |             print(traceback.format_exc())
62 |             if application_id and token:
63 |                 patch_original(application_id, token, "**Profile setup failed.** Please try again later.")
64 |     except Exception as e:
65 |         print("send_profile_to_db error:", e)
66 |         print(traceback.format_exc())
67 | 
68 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # Discord Bot specific
132 | # Secrets and API keys
133 | *.env
134 | .env.*
135 | secrets.json
136 | config.json
137 | bot_config.py
138 | 
139 | # Logs
140 | *.log
141 | logs/
142 | 
143 | # Database files
144 | *.db
145 | *.sqlite
146 | *.sqlite3
147 | 
148 | # Modal specific
149 | .modal/
150 | 
151 | # IDE and editor files
152 | .vscode/
153 | .idea/
154 | .cursor/
155 | *.swp
156 | *.swo
157 | *~
158 | 
159 | # OS generated files
160 | .DS_Store
161 | .DS_Store?
162 | ._*
163 | .Spotlight-V100
164 | .Trashes
165 | ehthumbs.db
166 | Thumbs.db
167 | 
168 | # Temporary files
169 | *.tmp
170 | *.temp
171 | temp/
172 | tmp/
173 | 
174 | # Node modules (if any)
175 | node_modules/
176 | 
177 | # Backup files
178 | *.bak
179 | *.backup
180 | *.old
181 | 
182 | # Archive files
183 | *.zip
184 | *.tar.gz
185 | *.rar
186 | *.7z
187 | 
188 | # Images that might contain sensitive info
189 | screenshots/
190 | *.screenshot.*
191 | 
192 | # Local development files
193 | local_*
194 | dev_*
195 | test_*
196 | 


--------------------------------------------------------------------------------
/helpers/functions/llm_summary.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | from helpers.functions.openai_runner import run_openai_structured
 3 | from helpers.config.llm_schemas import AnalysisResponse, SummaryResponse, PROMPT_ANALYSIS_SYSTEM, PROMPT_THEME_SUMMARY_SYSTEM
 4 | 
 5 | def analyze_themes(formatted_data: str, profile: Dict, time_period: str) -> AnalysisResponse | None:
 6 |     """Analyze keyword data to identify relevant themes using medium reasoning."""
 7 |     try:
 8 |         system_variables = {
 9 |             "name": profile.get("name") or profile.get("username", ""),
10 |             "personality": profile.get("personality", ""),
11 |             "user_interests": profile.get("interests", ""),
12 |             "time_period": time_period
13 |         }
14 |         
15 |         system_prompt = PROMPT_ANALYSIS_SYSTEM.format(**system_variables)
16 |         user_prompt = f"Analyze this keyword data and identify the most relevant themes: {formatted_data}"
17 |         
18 |         analysis_result: AnalysisResponse = run_openai_structured(
19 |             system_prompt=system_prompt,
20 |             user_prompt=user_prompt,
21 |             output_cls=AnalysisResponse,
22 |             model="gpt-5",
23 |             reasoning_effort="medium"
24 |         )
25 |         
26 |         print(f"Analysis result: {len(analysis_result.themes)} themes identified")
27 |         for theme in analysis_result.themes:
28 |             print(f"Theme: {theme.title} (Score: {theme.relevance}/10)")
29 |         return analysis_result
30 |     except Exception as e:
31 |         print(f"Theme analysis failed: {e}")
32 |         return None
33 | 
34 | def generate_summary_from_analysis(analysis: AnalysisResponse, formatted_data: str, profile: Dict, time_period: str) -> SummaryResponse | None:
35 |     """Generate summaries from theme analysis using high reasoning."""
36 |     try:
37 |         system_variables = {
38 |             "name": profile.get("name") or profile.get("username", ""),
39 |             "personality": profile.get("personality", ""),
40 |             "user_interests": profile.get("interests", ""),
41 |             "concise_summaries": profile.get("concise_summaries", False),
42 |             "time_period": time_period
43 |         }
44 |         
45 |         system_prompt = PROMPT_THEME_SUMMARY_SYSTEM.format(**system_variables)
46 |         themes_text = "\n".join([
47 |             f"Theme: {theme.title} (Relevance: {theme.relevance}/10)\n"
48 |             f"Key points: {', '.join(theme.key_points)}\n"
49 |             f"Keywords: {', '.join(theme.supporting_keywords)}\n"
50 |             for theme in analysis.themes
51 |         ])
52 |         
53 |         user_prompt = f"""Based on this theme analysis:
54 |             {themes_text}
55 | 
56 |             Overall focus: {analysis.overall_focus}
57 |             Priority reasoning: {analysis.user_priority_reasoning}
58 | 
59 |             And this full data: {formatted_data}
60 | 
61 |             Write comprehensive long_summary and concise_summary focusing on the identified themes. Keep citations intact [n:n] format. Return a title too."""
62 |                         
63 |         summary_result: SummaryResponse = run_openai_structured(
64 |             system_prompt=system_prompt,
65 |             user_prompt=user_prompt,
66 |             output_cls=SummaryResponse,
67 |             model="gpt-5",
68 |             reasoning_effort="medium"
69 |         )
70 |         
71 |         return summary_result
72 |     except Exception as e:
73 |         print(f"Summary generation from analysis failed: {e}")
74 |         return None
75 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | import os, json, modal
 2 | from fastapi import Request, Response, BackgroundTasks
 3 | from helpers.functions.send_profile_to_db import send_profile_to_db
 4 | from helpers.functions.discord_request import extract_verified_body
 5 | from helpers.commands.news import run_news_updates
 6 | from helpers.commands.setup import (             
 7 |     handle_setup_command,                
 8 |     handle_setup_button_interaction,    
 9 |     handle_modal_submission,             
10 | )
11 | 
12 | APP_NAME = "modal-webhook-echo"
13 | SECRET_NAME = "safron-bot"
14 | 
15 | def create_image():
16 |     return modal.Image.debian_slim().pip_install(
17 |         "fastapi>=0.110",
18 |         "requests",
19 |         "pydantic",
20 |         "PyNaCl",
21 |         "aiohttp",
22 |         "llama-index-core",
23 |         "llama-index-llms-gemini",
24 |         "google-generativeai",
25 |         "google-genai",
26 |         "llama-index-llms-openai",
27 |         "openai",
28 |         "pymongo[srv]>=4.6"
29 |     ).add_local_python_source("helpers")
30 | 
31 | app = modal.App(APP_NAME, secrets=[modal.Secret.from_name(SECRET_NAME)])
32 | image = create_image()
33 | 
34 | @app.function(image=image, cpu=0.125, scaledown_window=300, min_containers=1, timeout=900, secrets=[modal.Secret.from_name(SECRET_NAME)])
35 | @modal.fastapi_endpoint(method="POST")
36 | async def discord_interactions(request: Request, background_tasks: BackgroundTasks):
37 |     body = await extract_verified_body(request)
38 |     if body is None:
39 |         return Response(status_code=401)
40 | 
41 |     data = json.loads(body.decode("utf-8"))
42 |     t = data.get("type")
43 | 
44 |     if t == 1:
45 |         return {"type": 1}
46 | 
47 |     if t == 2:
48 |         cmd = data.get("data", {}).get("name")
49 |         user = (data.get("member") or {}).get("user") or {}
50 |         user_id, user_name, global_name = user.get("id"), user.get("username"), user.get("global_name")
51 | 
52 |         if cmd == "setup":
53 |             return await handle_setup_command(data, user_id, user_name, global_name)
54 |         
55 |         if cmd == "news":
56 |             application_id = data.get("application_id")
57 |             token = data.get("token")
58 |             channel_id = data.get("channel_id")
59 |             options = data.get("data", {}).get("options", [])
60 | 
61 |             time_period_override = None
62 |             for option in options:
63 |                 if option.get("name") == "time_period":
64 |                     time_period_override = option.get("value")
65 |                     break
66 |             
67 |             if application_id and token and user_id:
68 |                 background_tasks.add_task(run_news_updates, application_id, token, user_id, time_period_override, channel_id)
69 |             return {"type": 5, "data": {"flags": 64}}
70 | 
71 |         return {"type": 4, "data": {"content": "Unknown command", "flags": 64}}
72 | 
73 |     if t == 3:
74 |         resp = await handle_setup_button_interaction(data)
75 |         return resp or {"type": 6}
76 | 
77 |     if t == 5:
78 |         try:
79 |             result = await handle_modal_submission(data)
80 |             if not result:
81 |                 return {"type": 6}
82 |             response_payload, profile_data = result
83 |             application_id = data.get("application_id")
84 |             token = data.get("token")
85 |             background_tasks.add_task(send_profile_to_db, profile_data, application_id, token)
86 |             return response_payload
87 |         except Exception as e:
88 |             print(f"Error in modal submission: {e}")
89 |             return {"type": 4, "data": {"content": "An error occurred processing your submission.", "flags": 64}}
90 | 
91 |     return Response(status_code=200)
92 | 
93 | 


--------------------------------------------------------------------------------
/helpers/functions/api_utils.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import re
 3 | import time
 4 | from typing import Dict, List
 5 | 
 6 | def fetch_keywords(period: str, api_category_name: str, sort: str) -> List[Dict]:
 7 |     """Fetch keywords from Safron API for a specific category and sort type."""
 8 |     try:
 9 |         url = "https://public.api.safron.io/v2/keywords"
10 |         params = {"period": period, "category": api_category_name, "sort": sort, "slim": "false"}
11 |         r = requests.get(url, params=params, timeout=45)
12 |         r.raise_for_status()
13 |         data = r.json() or {}
14 |         keywords = []
15 |         for item in data.get("keywords", []):
16 |             if item.get("keyword"):
17 |                 keyword_data = {
18 |                     "keyword": item.get("keyword"),
19 |                     "trending": item.get("trending", False),
20 |                     "count": item.get("count", 0),
21 |                     "change_in_count": item.get("change_in_count", 0),
22 |                     "engagement": item.get("engagement", 0),
23 |                     "change_in_engagement": item.get("change_in_engagement", 0),
24 |                     "sentiment": item.get("sentiment", {})
25 |                 }
26 |                 keywords.append(keyword_data)
27 |         return keywords
28 |     except Exception:
29 |         import traceback
30 |         print("Keyword fetch failed:", period, api_category_name, sort)
31 |         print(traceback.format_exc())
32 |         return []
33 | 
34 | def fetch_keyword_facts(keyword: str, period: str, max_retries: int = 3) -> Dict:
35 |     """Fetch facts for a specific keyword from Safron AI facts API."""
36 |     for attempt in range(max_retries):
37 |         try:
38 |             print(f"Fetching facts for: {keyword} (attempt {attempt + 1}/{max_retries})")
39 |             url = "https://public.api.safron.io/v2/ai-keyword-facts"
40 |             payload = {"keywords": keyword, "period": period}
41 |             r = requests.post(url, json=payload, timeout=60)
42 |             r.raise_for_status()
43 |             data = r.json() or {}
44 |             
45 |             summary = data.get("summary", "")
46 |             interesting = data.get("interesting", [])
47 |             all_citations = data.get("citations", [])
48 |             
49 |             if not summary and not interesting:
50 |                 facts = data.get("facts", [])
51 |                 if facts:
52 |                     if len(facts) >= 3:
53 |                         summary = " ".join(facts[:3])  
54 |                         interesting = facts[3:]       
55 |                     elif len(facts) >= 1:
56 |                         summary = " ".join(facts)    
57 |                         interesting = []
58 |             
59 |             referenced_citations = set()
60 |             
61 |             for match in re.finditer(r'\[(\d+)\]', summary):
62 |                 referenced_citations.add(int(match.group(1)))
63 |             
64 |             for item in interesting:
65 |                 for match in re.finditer(r'\[(\d+)\]', item):
66 |                     referenced_citations.add(int(match.group(1)))
67 |             
68 |             filtered_citations = [
69 |                 citation for citation in all_citations 
70 |                 if citation.get("n") in referenced_citations
71 |             ]
72 |             
73 |             return {
74 |                 "summary": summary,
75 |                 "interesting": interesting,
76 |                 "citations": filtered_citations
77 |             }
78 |         except Exception as e:
79 |             print(f"Keyword facts fetch failed: {keyword} (attempt {attempt + 1}/{max_retries}) - {e}")
80 |             if attempt < max_retries - 1:
81 |                 print(f"Retrying {keyword} in 2 seconds...")
82 |                 time.sleep(2)
83 |             else:
84 |                 import traceback
85 |                 print(traceback.format_exc())
86 |     
87 |     return {}
88 | 


--------------------------------------------------------------------------------
/helpers/functions/process_citations.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from typing import Dict, List, Tuple
  3 | 
  4 | def process_citations_in_summaries(concise_summary: str, long_summary: str, assembled_data: Dict) -> Tuple[str, str, List[Dict], List[Dict]]:
  5 |     cleaned_concise, concise_citations = process_single_summary(concise_summary, assembled_data)
  6 |     cleaned_long, long_citations = process_single_summary(long_summary, assembled_data)
  7 |     
  8 |     return cleaned_concise, cleaned_long, concise_citations, long_citations
  9 | 
 10 | def process_single_summary(summary: str, assembled_data: Dict) -> Tuple[str, List[Dict]]:
 11 |     citation_pattern = r'\[(\d+):(\d+)\]'
 12 |     
 13 |     groups = []
 14 |     matches = list(re.finditer(citation_pattern, summary))
 15 |     
 16 |     if not matches:
 17 |         return summary, []
 18 |     
 19 |     i = 0
 20 |     while i < len(matches):
 21 |         current_group = [matches[i].group(0)]
 22 |         while i + 1 < len(matches):
 23 |             current_end = matches[i].end()
 24 |             next_start = matches[i + 1].start()
 25 |             
 26 |             if next_start - current_end <= 1: 
 27 |                 i += 1
 28 |                 current_group.append(matches[i].group(0))
 29 |             else:
 30 |                 break
 31 |         
 32 |         groups.append(current_group)
 33 |         i += 1
 34 |     
 35 |     unique_groups = []
 36 |     seen_groups = set()
 37 |     
 38 |     for group in groups:
 39 |         group_key = tuple(sorted(group)) 
 40 |         if group_key not in seen_groups:
 41 |             unique_groups.append(group)
 42 |             seen_groups.add(group_key)
 43 |     
 44 |     citations_list = []
 45 |     group_to_number = {}
 46 |     
 47 |     for i, group in enumerate(unique_groups, 1):
 48 |         group_urls = []
 49 |         
 50 |         for citation_ref in group:
 51 |             match = re.match(r'\[(\d+):(\d+)\]', citation_ref)
 52 |             if match:
 53 |                 keyword_num = int(match.group(1))
 54 |                 citation_num = int(match.group(2))
 55 |                 keyword_obj = _find_keyword_by_number(assembled_data, keyword_num)
 56 |                 if keyword_obj and "citations" in keyword_obj:
 57 |                     for citation in keyword_obj["citations"]:
 58 |                         if citation.get("n") == f"{keyword_num}:{citation_num}":
 59 |                             url = citation.get("url", "")
 60 |                             if url:
 61 |                                 group_urls.append(url)
 62 |                             break
 63 |         
 64 |         deduplicated_urls = list(dict.fromkeys(group_urls))
 65 |         
 66 |         citations_list.append({
 67 |             "n": i,
 68 |             "urls": deduplicated_urls
 69 |         })
 70 |         
 71 |         group_key = tuple(sorted(group))
 72 |         group_to_number[group_key] = i
 73 |     
 74 |     cleaned_summary = summary
 75 |     
 76 |     for group in groups:
 77 |         group_key = tuple(sorted(group))
 78 |         new_number = group_to_number[group_key]
 79 |         
 80 |         group_text = ''.join(group)
 81 |         cleaned_summary = cleaned_summary.replace(group_text, f"[{new_number}]", 1)  
 82 |     
 83 |     return cleaned_summary, citations_list
 84 | 
 85 | 
 86 | def _find_keyword_by_number(assembled_data: Dict, keyword_num: int) -> Dict:
 87 |     for category_type in assembled_data.values():
 88 |         if isinstance(category_type, dict):
 89 |             for category_data in category_type.values():
 90 |                 if isinstance(category_data, list):
 91 |                     for keyword_obj in category_data:
 92 |                         if keyword_obj.get("keyword_number") == keyword_num:
 93 |                             return keyword_obj
 94 |                 elif isinstance(category_data, dict):
 95 |                     for sort_data in category_data.values():
 96 |                         if isinstance(sort_data, list):
 97 |                             for keyword_obj in sort_data:
 98 |                                 if keyword_obj.get("keyword_number") == keyword_num:
 99 |                                     return keyword_obj
100 |     return {}
101 | 
102 | 
103 | def format_citations_for_thread(citations_list: List[Dict], max_citations_per_message: int = 4) -> List[str]:
104 |     if not citations_list:
105 |         return []
106 |     
107 |     messages = []
108 |     current_lines = ["**Sources:**"]
109 |     citations_in_current_message = 0
110 |     
111 |     for citation in citations_list:
112 |         n = citation["n"]
113 |         urls = citation["urls"]
114 |         
115 |         citation_lines = []
116 |         if urls:
117 |             formatted_urls = ", ".join([f"{url}" for url in urls])
118 |             citation_lines.append(f"[{n}] {formatted_urls}")
119 |         else:
120 |             citation_lines.append(f"[{n}] (source not found)")
121 |         
122 |         if citations_in_current_message >= max_citations_per_message and len(current_lines) > 1:
123 |             messages.append("\n".join(current_lines))
124 |             current_lines = []
125 |             citations_in_current_message = 0
126 |         
127 |         current_lines.extend(citation_lines)
128 |         citations_in_current_message += 1
129 |     
130 |     if current_lines:
131 |         messages.append("\n".join(current_lines))
132 |     
133 |     return messages
134 | 


--------------------------------------------------------------------------------
/helpers/functions/discord_updates.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import json
  3 | import os
  4 | 
  5 | def patch_original(application_id: str, token: str, content: str, ephemeral: bool = True) -> int:
  6 |     url = f"https://discord.com/api/v10/webhooks/{application_id}/{token}/messages/@original"
  7 |     payload = {"content": content}
  8 |     if ephemeral:
  9 |         payload["flags"] = 64
 10 |     resp = requests.patch(url, json=payload, timeout=10)
 11 |     return resp.status_code
 12 | 
 13 | 
 14 | def post_followup(application_id: str, token: str, content: str, ephemeral: bool = True) -> int:
 15 |     url = f"https://discord.com/api/v10/webhooks/{application_id}/{token}"
 16 |     payload = {"content": content}
 17 |     if ephemeral:
 18 |         payload["flags"] = 64
 19 |     try:
 20 |         resp = requests.post(url, json=payload, timeout=10)
 21 |         resp.raise_for_status()
 22 |         return resp.status_code
 23 |     except Exception as e:
 24 |         print(f"post_followup failed: {e}")
 25 |         import traceback
 26 |         print(traceback.format_exc())
 27 |         return 500
 28 | 
 29 | def send_followup_get_msg(application_id: str, token: str, content: str) -> dict:
 30 |     """Send followup message and return full response with message_id and channel_id."""
 31 |     url = f"https://discord.com/api/v10/webhooks/{application_id}/{token}?wait=true"
 32 |     r = requests.post(url, json={"content": content}, timeout=10)
 33 |     r.raise_for_status()
 34 |     return r.json()
 35 | 
 36 | def post_channel_message(bot_token: str, channel_id: str, content: str):
 37 |     url = f"https://discord.com/api/v10/channels/{channel_id}/messages"
 38 |     headers = {"Authorization": f"Bot {bot_token}"}
 39 |     r = requests.post(url, headers=headers, json={"content": content}, timeout=10)
 40 |     r.raise_for_status()
 41 |     return r.json()
 42 | 
 43 | def create_thread_from_message(bot_token: str, channel_id: str, message_id: str, name: str = "Discussion", auto_archive: int = 1440) -> str:
 44 |     """Create a thread from a message using bot token."""
 45 |     url = f"https://discord.com/api/v10/channels/{channel_id}/messages/{message_id}/threads"
 46 |     headers = {"Authorization": f"Bot {bot_token}"}
 47 |     r = requests.post(url, headers=headers, json={"name": name, "auto_archive_duration": auto_archive}, timeout=10)
 48 |     r.raise_for_status()
 49 |     return r.json()["id"] 
 50 | 
 51 | def bot_post_in_thread(bot_token: str, thread_id: str, content: str) -> str:
 52 |     url = f"https://discord.com/api/v10/channels/{thread_id}/messages"
 53 |     headers = {"Authorization": f"Bot {bot_token}"}
 54 |     r = requests.post(url, headers=headers, json={"content": content, "flags": 4}, timeout=10)
 55 |     r.raise_for_status()
 56 |     return r.json()["id"]
 57 | 
 58 | def post_followup_with_thread(application_id: str, token: str, content: str, ephemeral: bool = False, citations_list: list = None, username: str = None, channel_id: str = None, summary_title: str = None) -> int:
 59 |     try:
 60 |         bot_token = os.environ.get("BOT_TOKEN")
 61 |         if not bot_token:
 62 |             print("BOT_TOKEN not found, falling back to regular post")
 63 |             return post_followup(application_id, token, content[:1900], ephemeral)
 64 |         
 65 |         paragraphs = [p.strip() for p in content.split('\n') if p.strip()]
 66 |         
 67 |         if not paragraphs:
 68 |             return post_followup(application_id, token, content[:1900], ephemeral)
 69 |         
 70 |         first_paragraph = paragraphs[0]
 71 |         remaining_paragraphs = paragraphs[1:]
 72 |         
 73 |         if channel_id and bot_token:
 74 |             try:
 75 |                 msg_data = post_channel_message(bot_token, channel_id, first_paragraph)
 76 |                 message_id = msg_data.get("id")
 77 |             except Exception as e:
 78 |                 print(f"Failed to post original message: {e}")
 79 |         else:
 80 |             print(f"Missing channel_id ({channel_id}) or bot_token ({bool(bot_token)}), using followup")
 81 |             msg_data = send_followup_get_msg(application_id, token, first_paragraph)
 82 |             message_id = msg_data.get("id")
 83 |             channel_id = msg_data.get("channel_id")
 84 |         
 85 |         if not message_id or not channel_id or not remaining_paragraphs:
 86 |             return 200
 87 | 
 88 |         from datetime import datetime
 89 |         
 90 |         if summary_title:
 91 |             thread_name = f"{summary_title} ({datetime.now().strftime('%m/%d')})"
 92 |         else:
 93 |             date_str = datetime.now().strftime("%m/%d")
 94 |             thread_name = f"{username}'s Summary ({date_str})"
 95 |         
 96 |         thread_id = create_thread_from_message(bot_token, channel_id, message_id, thread_name)
 97 |         
 98 |         thread_messages = []
 99 |         current_message = ""
100 |         
101 |         for paragraph in remaining_paragraphs:
102 |             if paragraph.strip():
103 |                 test_message = current_message + ("\n\n" if current_message else "") + paragraph.strip()
104 |                 if len(test_message) <= 1900:
105 |                     current_message = test_message
106 |                 else:
107 |                     if current_message:
108 |                         thread_messages.append(current_message)
109 |                     current_message = paragraph.strip()
110 |         
111 |         if current_message:
112 |             thread_messages.append(current_message)
113 |         
114 |         for message in thread_messages:
115 |             bot_post_in_thread(bot_token, thread_id, message)
116 |         
117 |         if citations_list:
118 |             from helpers.functions.process_citations import format_citations_for_thread
119 |             citations_messages = format_citations_for_thread(citations_list)
120 |             for citations_text in citations_messages:
121 |                 if citations_text:
122 |                     print(f"Posting citations: {citations_text}")
123 |                     bot_post_in_thread(bot_token, thread_id, citations_text)
124 |         return 200
125 |         
126 |     except Exception as e:
127 |         print(f"post_followup_with_thread failed: {e}")
128 |         import traceback
129 |         print(traceback.format_exc())


--------------------------------------------------------------------------------
/helpers/commands/setup.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict
  2 | import asyncio
  3 | from helpers.functions.send_profile_to_db import send_profile_to_db
  4 | 
  5 | async def handle_setup_command(data: Dict, user_id: str, user_name: str, global_name: str):
  6 |     about_you = ""
  7 |     for option in (data.get("data", {}) or {}).get("options", []) or []:
  8 |         if option.get("name") == "about_you":
  9 |             about_you = (option.get("value") or "").strip()
 10 | 
 11 |     prefill = about_you.replace("\n", " ").replace("|", "/").strip()[:80]
 12 | 
 13 |     return {
 14 |         "type": 4,
 15 |         "data": {
 16 |             "content": f" Set up your profile, {global_name or user_name}! This will help build news reports that are customized to you.\n\n",
 17 |             "flags": 64,
 18 |             "components": [
 19 |                 {
 20 |                     "type": 1,
 21 |                     "components": [
 22 |                         {
 23 |                             "type": 2,
 24 |                             "style": 1,
 25 |                             "label": "Open Setup Form",
 26 |                             "custom_id": f"open_setup_modal|{prefill}" if prefill else "open_setup_modal"
 27 |                         }
 28 |                     ]
 29 |                 }
 30 |             ]
 31 |         }
 32 |     }
 33 | 
 34 | async def handle_setup_button_interaction(data: Dict):
 35 |     custom_id = (data.get("data", {}) or {}).get("custom_id")
 36 |     if not custom_id or not custom_id.startswith("open_setup_modal"):
 37 |         return None
 38 | 
 39 |     about_you = ""
 40 |     if "|" in custom_id:
 41 |         about_you = custom_id.split("|", 1)[1]
 42 |     
 43 |     return {
 44 |         "type": 9,  
 45 |         "data": {
 46 |             "custom_id": "setup_modal",
 47 |             "title": "Profile Setup",
 48 |             "components": [
 49 |                 { 
 50 |                     "type": 1,
 51 |                     "components": [{
 52 |                         "type": 4,
 53 |                         "custom_id": "interests_input",
 54 |                         "label": "Your work and interests", 
 55 |                         "style": 2, 
 56 |                         "required": True,
 57 |                         "max_length": 500,
 58 |                         "placeholder": "e.g., tech, AI, startups",
 59 |                         "value": about_you  
 60 |                     }]
 61 |                 },
 62 |                 { 
 63 |                     "type": 1,
 64 |                     "components": [{
 65 |                         "type": 4,
 66 |                         "custom_id": "keywords_input",
 67 |                         "label": "Track keywords (comma-separated)", 
 68 |                         "style": 2,
 69 |                         "required": False,
 70 |                         "max_length": 500,
 71 |                         "placeholder": "AI, LLMs, Machine Learning, Google, OpenAI, Elon Musk, etc"
 72 |                     }]
 73 |                 },
 74 |                 { 
 75 |                     "type": 1,
 76 |                     "components": [{
 77 |                         "type": 4,
 78 |                         "custom_id": "connecting_keywords_input",
 79 |                         "label": "Use connecting keywords? (yes/no)", 
 80 |                         "style": 1,
 81 |                         "required": False,
 82 |                         "max_length": 10,
 83 |                         "placeholder": "yes"
 84 |                     }]
 85 |                 },
 86 |                 {  
 87 |                     "type": 1,
 88 |                     "components": [{
 89 |                         "type": 4,
 90 |                         "custom_id": "summary_style_input",
 91 |                         "label": "Summary style you prefer", 
 92 |                         "style": 2,
 93 |                         "required": False,
 94 |                         "max_length": 500,
 95 |                         "placeholder": "Concise bullets / exec summary"
 96 |                     }]
 97 |                 },
 98 |                 {  # 5
 99 |                     "type": 1,
100 |                     "components": [{
101 |                         "type": 4,
102 |                         "custom_id": "time_period_input",
103 |                         "label": "Time period (daily/weekly/monthly)",
104 |                         "style": 1, 
105 |                         "required": True,
106 |                         "max_length": 20,
107 |                         "placeholder": "daily"
108 |                     }]
109 |                 }
110 |             ]
111 |         }
112 |     }
113 | 
114 | 
115 | async def handle_modal_submission(data: Dict):
116 |     if data.get("data", {}).get("custom_id") != "setup_modal":
117 |         return None
118 | 
119 |     user = (data.get("member") or {}).get("user") or {}
120 |     user_id = user.get("id")
121 |     user_name = user.get("username")
122 |     global_name = user.get("global_name")
123 |     if not user_id:
124 |         return {"type": 4, "data": {"content": "Could not determine user id.", "flags": 64}}
125 | 
126 |     comps = data.get("data", {}).get("components", [])
127 |     responses = {}
128 |     for row in comps:
129 |         for c in row.get("components", []):
130 |             cid, val = c.get("custom_id"), c.get("value", "").strip()
131 |             if cid == "interests_input": responses["interests"] = val
132 |             elif cid == "keywords_input": responses["keywords"] = val
133 |             elif cid == "summary_style_input": responses["summary_style"] = val
134 |             elif cid == "time_period_input": responses["time_period"] = val.title()
135 |             elif cid == "connecting_keywords_input": responses["connecting_keywords"] = val.title()
136 | 
137 |     summary = (
138 |         f"• **Notes from user: ** {responses.get('interests','Not specified')}\n"
139 |         f"• **Keywords they want to track: ** {responses.get('keywords','Not specified')}\n"
140 |         f"• **Do they want to track connecting Keywords? ** {responses.get('connecting_keywords','Not specified')}\n"
141 |         f"• **Summary Style: ** {responses.get('summary_style','Not specified')}\n"
142 |         f"• **Time Period: ** {responses.get('time_period','Not specified')}\n"
143 |     )
144 | 
145 |     profile_data = {
146 |         "user_id": user_id,
147 |         "user_name": user_name,
148 |         "global_name": global_name,
149 |         "responses": responses,
150 |         "summary": summary,
151 |     }
152 | 
153 |     response_payload = {"type": 4, "data": {"content": "** We're working on your profile...**", "flags": 64}}
154 |     return response_payload, profile_data
155 | 
156 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Personalized Discord News Bot
  2 | 
  3 | An AI Discord bot built with Modal (serverless platform) and [Safron API](https://docs.safron.io/) that provides personalized synthesized news reports for tech using social listening APIs and AI/LLM services.
  4 | 
  5 | The bot processes thousands of posts and comments from tech websites (Reddit, HN, Github, tech blogs, X, ArXiv) by preprocessing and caching data then through various prompt-chaining strategies produces an extensive report that should be interesting to a user based on their profile. 
  6 | 
  7 | If you want to try the bot without setting this up yourself, see the [Safron Discord server](https://discord.gg/v6BV49DCpp). 
  8 | 
  9 | ![Discord Bot Example](images/example_discord.png)
 10 | 
 11 | Everything is citated back to exact sources so the user can vet the information.
 12 | 
 13 | ![Discord Bot Example Sources](images/sources_tech_bot_discord.png)
 14 | 
 15 | **Note:** This is a first version, and a work in progress. 
 16 | 
 17 | ## Tools used
 18 | 1. **[Modal](https://modal.com/)**: for hosting, set with min_container=1 to prevent timeouts.
 19 | 2. **[Discord](https://discord.com/)**: to run the bot through, setting up two commands (/setup and /news) user can run.
 20 | 3. **[Safron](https://docs.safron.io/)**: for structured data on the tech scene, using the keywords and ai-facts endpoints to gather data.
 21 | 4. **[MongoDB](https://www.mongodb.com/)**: to store user profiles so it's easy for users to run /news without having to repeat themselves.
 22 | 5. **LLMs**: [Gemini](https://ai.google.dev/) and [OpenAI GPT](https://platform.openai.com) is used to transform natural language into json inputs along with GPT-5 at the end that find themes and summarizes.
 23 | 
 24 | # How to setup
 25 | 
 26 | ## Prerequisites
 27 | 
 28 | **Before setting up this bot, you'll need:**
 29 | 
 30 | - Python 3.13+
 31 | - A Discord application and bot setup via the Developer Portal
 32 | - Two Commands set up for the bot (/setup (required field about_me) & /news (optional field time_period)).
 33 | - Modal account
 34 | - MongoDB database
 35 | - API keys for AI services (Google & OpenAI)
 36 | 
 37 | ### 2. Required Secrets
 38 | 
 39 | The bot requires the following environment variables to be set in Modal for a new secret called "safron-bot":
 40 | 
 41 | #### Discord Secrets
 42 | - `DISCORD_PUBLIC_KEY`: Your Discord application's public key (found in General Information)
 43 | - `BOT_TOKEN`: Your Discord bot token
 44 | 
 45 | #### Database
 46 | - `MONGO_DB_URI`: MongoDB connection string (e.g., `mongodb+srv://username:password@cluster.mongodb.net/`)
 47 | 
 48 | #### AI/LLM Services (Choose one or both)
 49 | - `GOOGLE_API_KEY`: Google Gemini API key
 50 | - `OPENAI_API_KEY`: OpenAI API key
 51 | 
 52 | ![Secrets to set in Modal](images/secrets_modal.png)
 53 | 
 54 | ## Deployment
 55 | 
 56 | Clone this repository:
 57 | 
 58 | ```bash
 59 | git clone https://github.com/ilsilfverskiold/ai-personalized-tech-reports-discord.git
 60 | cd discord-bot
 61 | ```
 62 | 
 63 | Setup your environment
 64 | 
 65 | ```bash
 66 | python3 -m venv venv
 67 | source venv/bin/activate
 68 | ```
 69 | 
 70 | Install the requirements
 71 | 
 72 | ```bash
 73 | pip install -r requirements.txt
 74 | ```
 75 | 
 76 | Deploy the modal app
 77 | 
 78 | ```bash
 79 | modal deploy app.py
 80 | ```
 81 | 
 82 | You'll get an URL here you'll need to set as the webhook in Discord. Test the url and after this you should be good to do.
 83 | 
 84 | Make sure you have a MongoDB URI set too or the system won't be able to store the profile data via /setup. 
 85 | 
 86 | The cost of running the news report is two GPT-5 calls of around 1-2k tokens each for each run.
 87 | 
 88 | 
 89 | ## Project Structure
 90 | 
 91 | ```
 92 | discord-bot/
 93 | ├── app.py                          # Main application entry point
 94 | ├── requirements.txt                # Python dependencies
 95 | ├── helpers/
 96 | │   ├── commands/
 97 | │   │   ├── setup.py               # Setup command handler
 98 | │   │   └── news.py                # News command handler
 99 | │   ├── functions/
100 | │   │   ├── api_utils.py           # API utilities
101 | │   │   ├── discord_*.py           # Discord interaction handlers
102 | │   │   ├── llm_*.py               # LLM integration
103 | │   │   └── *.py                   # Various utility functions
104 | │   └── config/
105 | │       └── llm_schemas.py         # Pydantic schemas for LLM responses + system templates
106 | ```
107 | 
108 | ## Dependencies
109 | 
110 | - **FastAPI**: Web framework for handling Discord interactions
111 | - **Modal**: Serverless platform for deployment
112 | - **Discord.py**: Discord API integration
113 | - **MongoDB**: Database for storing user profiles
114 | - **LlamaIndex**: LLM framework for AI processing
115 | - **Google Generative AI**: Gemini API integration
116 | - **OpenAI**: OpenAI API integration
117 | - **PyNaCl**: Discord signature verification
118 | 
119 | 
120 | ## How It Works
121 | 
122 | The bot operates through two main commands that work together to deliver personalized news reports:
123 | 
124 | ### 🔧 Setup Command (`/setup`)
125 | 
126 | **Purpose**: Create a personalized user profile for customized news delivery
127 | 
128 | 1. **Profile Creation**: User provides their interests, work background, and keyword preferences
129 | 2. **AI Analysis**: LLM processes the input and categorizes keywords using Safron's category system
130 | 3. **Data Storage**: Profile is saved to MongoDB for future use
131 | 4. **Confirmation**: User receives confirmation that their profile is ready
132 | 
133 | ### 📰 News Command (`/news`)
134 | 
135 | **Purpose**: Generate personalized news reports based on user profile
136 | 
137 | #### Step 1: Profile Retrieval
138 | - Fetch user's saved profile from MongoDB
139 | - If no profile exists, prompt user to run `/setup` first
140 | 
141 | #### Step 2: Keyword Discovery
142 | - **Major categories**: Fetch top 3 trending keywords
143 | - **Minor categories**: Fetch top 2 trending keywords  
144 | - Use user's preferred time period (daily/weekly/monthly) or command override
145 | - Include any custom keywords specified by the user
146 | 
147 | #### Step 3: Data Collection
148 | - Use Safron's `ai-keyword-facts` endpoint to gather detailed information
149 | - **Caching**: First-time keywords may be slow, subsequent calls are fast
150 | - Collect posts, comments, and insights from various tech sources
151 | 
152 | #### Step 4: Data Processing
153 | - Assemble and clean collected data
154 | - Assign unique citation numbers for traceability
155 | - Organize information by themes and relevance
156 | 
157 | #### Step 5: AI Summarization
158 | - **Theme Analysis**: First LLM identifies key themes and corroborating facts
159 | - **Report Generation**: Second LLM creates two versions:
160 |   - **Concise**: Discord-friendly summary with key points
161 |   - **Extended**: Detailed report accessible via web link
162 | - Generate thread title and organize citations
163 | 
164 | #### Step 6: Delivery
165 | - Post concise summary to Discord as a thread
166 | - Include citation references and source links
167 | - Provide access to extended report via web URL
168 | 
169 | ## License
170 | 
171 | MIT
172 | 


--------------------------------------------------------------------------------
/helpers/functions/format_data.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict
  2 | from helpers.config.llm_schemas import CATEGORY_MAP
  3 | 
  4 | def format_assembled_data(assembled: Dict) -> str:
  5 |     assembled = _deduplicate_keywords(assembled)
  6 |     formatted_sections = []
  7 | 
  8 |     if "keywords" in assembled:
  9 |         category_data = assembled["keywords"]
 10 |         section_title = "KEYWORDS"
 11 |         formatted_sections.append(f"\n=== {section_title} ===")
 12 |         
 13 |         for category_name, category_content in category_data.items():
 14 |             category_display = CATEGORY_MAP.get(category_name, category_name).upper()
 15 |             formatted_sections.append(f"\n--- {category_display} ---")
 16 |             
 17 |             if isinstance(category_content, list):
 18 |                 sorted_keywords = _sort_keywords_trending_first(category_content)
 19 |                 for i, keyword_obj in enumerate(sorted_keywords, 1):
 20 |                     formatted_sections.append(_format_keyword_object(keyword_obj, i))
 21 |     
 22 |     for category_type, category_data in assembled.items():
 23 |         if category_type == "keywords": 
 24 |             continue
 25 |         if not isinstance(category_data, dict):
 26 |             continue
 27 |             
 28 |         section_title = category_type.upper()
 29 |         formatted_sections.append(f"\n=== {section_title} ===")
 30 |         
 31 |         for category_name, category_content in category_data.items():
 32 |             category_display = CATEGORY_MAP.get(category_name, category_name).upper()
 33 |             formatted_sections.append(f"\n--- {category_display} ---")
 34 |             
 35 |             if isinstance(category_content, list):
 36 |                 sorted_keywords = _sort_keywords_trending_first(category_content)
 37 |                 for i, keyword_obj in enumerate(sorted_keywords, 1):
 38 |                     formatted_sections.append(_format_keyword_object(keyword_obj, i))
 39 |             elif isinstance(category_content, dict):
 40 |                 sort_order = ["trending", "top"]
 41 |                 for sort_type in sort_order:
 42 |                     if sort_type in category_content:
 43 |                         keyword_list = category_content[sort_type]
 44 |                         if isinstance(keyword_list, list) and keyword_list:
 45 |                             sort_title = sort_type.upper()
 46 |                             formatted_sections.append(f"\n{sort_title}:")
 47 |                             sorted_keywords = _sort_keywords_trending_first(keyword_list)
 48 |                             for i, keyword_obj in enumerate(sorted_keywords, 1):
 49 |                                 formatted_sections.append(_format_keyword_object(keyword_obj, i))
 50 |     
 51 |     return "\n".join(formatted_sections)
 52 | 
 53 | def _deduplicate_keywords(assembled: Dict) -> Dict:
 54 |     seen_keywords = set()
 55 |     
 56 |     priority_order = [
 57 |         ("major", "trending"), ("major", "top"), 
 58 |         ("minor", "trending"), 
 59 |         ("keywords", "profile")
 60 |     ]
 61 |     
 62 |     for category_type, sort_type in priority_order:
 63 |         if category_type in assembled:
 64 |             if category_type == "keywords":
 65 |                 if sort_type in assembled[category_type]:
 66 |                     unique_keywords = []
 67 |                     for keyword_obj in assembled[category_type][sort_type]:
 68 |                         keyword = keyword_obj.get("keyword", "").lower()
 69 |                         if keyword and keyword not in seen_keywords:
 70 |                             seen_keywords.add(keyword)
 71 |                             unique_keywords.append(keyword_obj)
 72 |                     assembled[category_type][sort_type] = unique_keywords
 73 |             else:
 74 |                 for category_name, category_data in assembled[category_type].items():
 75 |                     if sort_type in category_data:
 76 |                         unique_keywords = []
 77 |                         for keyword_obj in category_data[sort_type]:
 78 |                             keyword = keyword_obj.get("keyword", "").lower()
 79 |                             if keyword and keyword not in seen_keywords:
 80 |                                 seen_keywords.add(keyword)
 81 |                                 unique_keywords.append(keyword_obj)
 82 |                         assembled[category_type][category_name][sort_type] = unique_keywords
 83 |     
 84 |     return assembled
 85 | 
 86 | def _sort_keywords_trending_first(keyword_list):
 87 |     trending = [k for k in keyword_list if k.get("trending", False)]
 88 |     non_trending = [k for k in keyword_list if not k.get("trending", False)]
 89 |     return trending + non_trending
 90 | 
 91 | def _format_keyword_object(keyword_obj: Dict, index: int) -> str:
 92 |     keyword = keyword_obj.get("keyword", "")
 93 |     keyword_num = keyword_obj.get("keyword_number", "")
 94 |     summary = keyword_obj.get("summary", "")
 95 |     interesting = keyword_obj.get("interesting", [])
 96 |     
 97 |     header_parts = [f"{index}. {keyword}"]
 98 |     if keyword_num:
 99 |         header_parts.append(f"(#{keyword_num})")
100 |     
101 |     stats = []
102 |     if keyword_obj.get("trending"):
103 |         stats.append("🔥 TRENDING")
104 |     if "count" in keyword_obj:
105 |         stats.append(f"Count: {keyword_obj['count']}")
106 |     if "change_in_count" in keyword_obj:
107 |         change = keyword_obj["change_in_count"]
108 |         if change > 0:
109 |             stats.append(f"↗️ +{change}%")
110 |         elif change < 0:
111 |             stats.append(f"↘️ {change}%")
112 |     if "engagement" in keyword_obj:
113 |         stats.append(f"Engagement: {keyword_obj['engagement']}")
114 |     if "change_in_engagement" in keyword_obj:
115 |         change = keyword_obj["change_in_engagement"]
116 |         if change > 0:
117 |             stats.append(f"↗️ +{change}%")
118 |         elif change < 0:
119 |             stats.append(f"↘️ {change}%")
120 |     if "sentiment" in keyword_obj and keyword_obj["sentiment"]:
121 |         try:
122 |             sentiment = keyword_obj["sentiment"]
123 |             positive_count = sentiment.get("positive", {}).get("count", 0)
124 |             negative_count = sentiment.get("negative", {}).get("count", 0)
125 |             total_count = keyword_obj.get("count", 0)
126 |             
127 |             if total_count > 0:
128 |                 positive_pct = positive_count / total_count
129 |                 negative_pct = negative_count / total_count
130 |                 
131 |                 if positive_pct > 0.5:
132 |                     stats.append("😊 Majority Positive")
133 |                 elif negative_pct > 0.5:
134 |                     stats.append("😞 Majority Negative")
135 |         except:
136 |             pass
137 |     
138 |     if stats:
139 |         header_parts.append(f"[{', '.join(stats)}]")
140 |     parts = [f"\n{' '.join(header_parts)}"]
141 |     if summary:
142 |         parts.append(f"Summary: {summary}")
143 |     if interesting:
144 |         parts.append("Interesting points:")
145 |         for point in interesting:
146 |             parts.append(f"  • {point}")
147 |     
148 |     return "\n".join(parts)
149 | 


--------------------------------------------------------------------------------
/helpers/commands/news.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import threading
  3 | import time
  4 | from typing import Dict, Any, List
  5 | from concurrent.futures import ThreadPoolExecutor, as_completed
  6 | from helpers.functions.send_profile_to_db import _get_mongo_collection  
  7 | from helpers.functions.discord_updates import patch_original, post_followup_with_thread
  8 | from helpers.functions.renumber_citations import renumber_keywords_and_citations
  9 | from helpers.functions.format_data import format_assembled_data
 10 | from helpers.functions.category_utils import normalize_profile_categories
 11 | from helpers.functions.data_utils import add_profile_keywords, get_all_keyword_objects
 12 | from helpers.functions.api_utils import fetch_keywords, fetch_keyword_facts
 13 | from helpers.functions.process_citations import process_citations_in_summaries
 14 | from helpers.functions.count_citations import count_total_citations
 15 | from helpers.functions.llm_summary import analyze_themes, generate_summary_from_analysis
 16 | from helpers.functions.progress_tracker import start_progress_tracker
 17 | from helpers.config.llm_schemas import SummaryResponse
 18 | 
 19 | class NewsSteps:
 20 | 
 21 |     def __init__(self, user_id: str):
 22 |         self.user_id = user_id
 23 | 
 24 |     def fetch_user_profile(self) -> Dict:
 25 |         col = _get_mongo_collection()
 26 |         return col.find_one({"user_id": self.user_id}) or {}
 27 | 
 28 |     def fetching_keywords(self, profile: Dict, time_period_override: str = None) -> tuple[Dict, str]:
 29 |         major, minor, period = normalize_profile_categories(profile, time_period_override)
 30 |         assembled = self.assemble_keywords(major, minor, period, profile)
 31 |         add_profile_keywords(assembled, profile)
 32 |         
 33 |         return assembled, period
 34 |     
 35 |     def assemble_keywords(self, major: List[str], minor: List[str], period: str, profile: Dict) -> Dict:
 36 |         assembled: Dict[str, Dict] = {"major": {}, "minor": {}}
 37 |         api_calls = []
 38 |         for key in major:
 39 |             api_calls.append((key, "major", "top", period, key, "top"))
 40 |             api_calls.append((key, "major", "trending", period, key, "trending"))
 41 |         for key in minor:
 42 |             api_calls.append((key, "minor", "trending", period, key, "trending"))
 43 |         
 44 |         with ThreadPoolExecutor(max_workers=10) as executor:
 45 |             future_to_call = {
 46 |                 executor.submit(fetch_keywords, period, category, sort): (key, category_type, sort)
 47 |                 for key, category_type, sort, period, category, sort in api_calls
 48 |             }
 49 |             for future in as_completed(future_to_call):
 50 |                 key, category_type, sort = future_to_call[future]
 51 |                 try:
 52 |                     keywords = future.result()
 53 |                     if category_type not in assembled:
 54 |                         assembled[category_type] = {}
 55 |                     if key not in assembled[category_type]:
 56 |                         assembled[category_type][key] = {}
 57 |                     if category_type == "major":
 58 |                         assembled[category_type][key][sort] = keywords[:3]
 59 |                     else:  
 60 |                         assembled[category_type][key][sort] = keywords[:2]
 61 |                 except Exception as e:
 62 |                     print(f"API call failed for {key} {sort}: {e}")
 63 |         
 64 |         return assembled
 65 | 
 66 |     def fetch_facts(self, assembled: Dict, period: str) -> None:
 67 |         all_keyword_objects = get_all_keyword_objects(assembled)
 68 |         keywords_list = [obj.get("keyword") for obj in all_keyword_objects if obj.get("keyword")]
 69 |         
 70 |         with ThreadPoolExecutor(max_workers=5) as executor:
 71 |             future_to_keyword = {
 72 |                 executor.submit(fetch_keyword_facts, keyword_obj.get("keyword"), period): keyword_obj
 73 |                 for keyword_obj in all_keyword_objects if keyword_obj.get("keyword")
 74 |             }
 75 |             
 76 |             for future in as_completed(future_to_keyword):
 77 |                 keyword_obj = future_to_keyword[future]
 78 |                 try:
 79 |                     facts_data = future.result()
 80 |                     if facts_data:
 81 |                         keyword_obj["summary"] = facts_data.get("summary", "")
 82 |                         keyword_obj["citations"] = facts_data.get("citations", [])
 83 |                         has_stats = any(key in keyword_obj for key in ["trending", "count", "change_in_count", "engagement"])
 84 |                         if not has_stats:
 85 |                             keyword_obj["interesting"] = facts_data.get("interesting", [])
 86 |                 except Exception as e:
 87 |                     print(f"Facts fetch failed for {keyword_obj.get('keyword')}: {e}")
 88 |     
 89 |     
 90 |     def generate_summary(self, assembled: Dict, profile: Dict, time_period: str) -> SummaryResponse | None:
 91 |         try:
 92 |             formatted_data = format_assembled_data(assembled)
 93 |             analysis_result = analyze_themes(formatted_data, profile, time_period)
 94 |             if not analysis_result:
 95 |                 return None
 96 | 
 97 |             summary_result = generate_summary_from_analysis(analysis_result, formatted_data, profile, time_period)
 98 |             return summary_result
 99 |             
100 |         except Exception as e:
101 |             print(f"LLM summary generation failed: {e}")
102 |             return None
103 |     
104 |     
105 |     def process_and_post_summary(self, summary_result: SummaryResponse, assembled: Dict, profile: Dict, application_id: str, token: str, channel_id: str = None) -> None:
106 |         cleaned_concise, cleaned_long, concise_citations, long_citations = process_citations_in_summaries(
107 |             summary_result.concise_summary, 
108 |             summary_result.long_summary, 
109 |             assembled
110 |         )
111 |         
112 |         total_citations = count_total_citations(assembled)
113 |         
114 |         timestamp = int(time.time())
115 |         username = profile.get("name") or profile.get("username", "User")
116 |         cleaned_concise = cleaned_concise + f"\n\n**We processed {total_citations} comments and posts to generate this report.**" + "\n\n **Note that this summary is built from posts and comments gathered from the web. Always verify.**" 
117 |         post_followup_with_thread(application_id, token, cleaned_concise, ephemeral=False, citations_list=concise_citations, username=username, channel_id=channel_id, summary_title=summary_result.title)
118 |         
119 |         print(f"Cleaned long summary:")
120 |         print(cleaned_long)
121 |         
122 | 
123 | def run_news_updates(application_id: str, token: str, user_id: str, time_period_override: str = None, channel_id: str = None) -> None:
124 |     try:
125 |         patch_original(application_id, token, "Checking your profile...")
126 |         steps = NewsSteps(user_id=user_id)
127 |         profile = steps.fetch_user_profile()
128 |         if not profile:
129 |             patch_original(application_id, token, "No profile found. Use /setup first.")
130 |             return
131 |         
132 |         patch_original(application_id, token, "Scouting top & trending keywords...")
133 |         assembled, period = steps.fetching_keywords(profile, time_period_override)
134 |         
135 |         all_keyword_objects = get_all_keyword_objects(assembled)
136 |         total_keywords = len([obj for obj in all_keyword_objects if obj.get("keyword")])
137 |         patch_original(application_id, token, f"Let's dig into what people are saying about {total_keywords} keywords we found for you..")
138 |         
139 |         facts_done = threading.Event()
140 |         start_time = time.time()
141 |         start_progress_tracker(application_id, token, total_keywords, facts_done)
142 |         
143 |         steps.fetch_facts(assembled, period)
144 |         facts_done.set() 
145 | 
146 |         assembled = renumber_keywords_and_citations(assembled)
147 |         patch_original(application_id, token, "Summarizing tons of data. Give us a minute or two. We'll ping you.")
148 |         summary_result = steps.generate_summary(assembled, profile, period)
149 |         
150 |         elapsed_time = int(time.time() - start_time)
151 |         
152 |         if summary_result:
153 |             steps.process_and_post_summary(summary_result, assembled, profile, application_id, token, channel_id)
154 |         else:
155 |             patch_original(application_id, token, f"The summary via the LLM provider failed. Please contact support.")
156 | 
157 |     except Exception as e:
158 |         import traceback
159 |         print("run_news_updates error:", e)
160 |         print(traceback.format_exc())
161 | 


--------------------------------------------------------------------------------
/helpers/config/llm_schemas.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | from pydantic import BaseModel
  3 | 
  4 | CATEGORY_MAP = {
  5 |     "subjects": "Subjects",
  6 |     "companies": "Companies & Organizations", 
  7 |     "ai": "AI Models & Assistants",
  8 |     "frameworks": "Frameworks & Libraries",
  9 |     "languages": "Languages & Syntax",
 10 |     "concepts": "Concepts & Methods",
 11 |     "tools": "Tools & Services",
 12 |     "platforms": "Platforms & Search Engines",
 13 |     "hardware": "Hardware & Systems",
 14 |     "websites": "Websites & Applications",
 15 |     "people": "People",
 16 |     "bucket": "Bucket (other)",
 17 | }
 18 | 
 19 | class ProfileNotesResponse(BaseModel):
 20 |     personality: str
 21 |     major_categories: List[str]
 22 |     minor_categories: List[str]
 23 |     keywords: List[str]
 24 |     time_period: str
 25 |     concise_summaries: bool
 26 | 
 27 | PROMPT_PROFILE_NOTES = """
 28 | You are tasked with defining a user persona based on the user's profile summary.
 29 | Your job is to:
 30 | 1. Pick a short personality description for the user.
 31 | 2. Select the most relevant categories (major and minor).
 32 | 3. Choose keywords the user should track, strictly following the rules below (max 6).
 33 | 4. Decide on time period (based only on what the user asks for).
 34 | 5. Decide whether the user prefers concise or detailed summaries.
 35 | 
 36 | ---
 37 | 
 38 | Step 1. Personality
 39 | - Write a short description of how we should think about the user.
 40 | - Examples:
 41 |   - CMO for non-technical product → "non-technical, skip jargon, focus on product keywords."
 42 |   - CEO → "only include highly relevant keywords, no technical overload, straight to the point."
 43 |   - Developer → "technical, interested in detailed developer conversation and technical terms."
 44 | 
 45 | ---
 46 | 
 47 | Step 2. Categories
 48 | Choose only from this catalog (with examples on what they usually contain):
 49 | 
 50 | - Companies & Organizations: Meta, Google, Tesla, OpenAI, Nvidia, etc.
 51 | - AI Models & Assistants: ChatGPT, Claude, Llama, Gemini, Qwen, DeepSeek, Wan
 52 | - People: Elon Musk, Sam Altman, etc.
 53 | - Platforms & Search Engines: AWS, Azure, GCP, Docker, Kubernetes, GitHub, Hugging Face, Vercel, Replit
 54 | - Websites & Applications: Reddit, YouTube, X/Twitter, Hacker News, LinkedIn, Discord, TikTok, App Store
 55 | - Subjects: AI, software development, open source, machine learning, cybersecurity, performance, China, US, EU, regulation, automation, data analysis, lawsuit, tariffs, privacy, security, job market, valuation, layoffs, inflation, etc.
 56 | - Tools & Services: Copilot, Cursor, VS Code, ComfyUI, Terraform, Grafana, Airflow, Proxmox
 57 | - Frameworks & Libraries: React, Next, Node, LangChain, LlamaIndex, PyTorch, TensorFlow, FastAPI, Django
 58 | - Languages & Syntax: Python, JavaScript, TypeScript, Rust, Go, Java, SQL, C, C++
 59 | - Hardware & Systems: Linux, Windows, Android, MacOS, iPhone, iOS, Debian, Raspberry Pi, etc.
 60 | - Concepts & Methods: Large Language Models, GPU, API, AGI, RAG, RAM, Loras, embeddings, fine tuning, prompts, algorithms, microservices, etc.
 61 | 
 62 | ---
 63 | 
 64 | Step 2a. To help you pick categories:
 65 | 
 66 | Non-technical
 67 | - investor → major: companies, subjects, minor: people, ai
 68 | - general manager → major: companies, subjects, minor: people, ai
 69 | - designer → major: subjects, companies, minor: websites, ai
 70 | - product marketer/manager → major: tools, platforms, minor: websites, subjects, ai
 71 | - marketing manager (non-technical product) → major: ai, subjects, minor: websites
 72 | - CxO → major: companies, subjects, minor: people
 73 | - sales → major: companies, subjects, minor: people, websites
 74 | 
 75 | Semi-technical
 76 | - marketing manager (technical product) → major: tools, platforms, minor: ai, subjects
 77 | - product manager → major: tools, platforms, concepts, minor: ai, subjects
 78 | - product marketing manager (technical products) → major: tools, platforms, concepts, minor: ai, subjects
 79 | - technical product manager → major: tools, platforms, concepts, minor: ai, subjects
 80 | - technical product marketer → major: tools, platforms, concepts, minor: ai, subjects
 81 | 
 82 | Technical
 83 | - frontend developer → major: frameworks, tools, platforms, minor: subjects
 84 | - backend developer → major: frameworks, tools, platforms, minor: subjects, concepts
 85 | - devops → major: platforms, concepts, tools, minor: hardware, frameworks
 86 | - it technician → major: hardware, concepts, minor: platforms
 87 | 
 88 | Other
 89 | - data scientist → major: ai, concepts, minor: tools, platforms, subjects
 90 | - security engineer → major: concepts, platforms, minor: hardware
 91 | - researcher → major: ai, concepts, minor: subjects
 92 | 
 93 | ---
 94 | 
 95 | Step 3. Keywords
 96 | 
 97 | Strict Priority Rules:
 98 | 1. Always include user-provided keywords. Never ignore them or filter them out.
 99 | HOWEVER, please always:
100 | 1. If abbreviated or badly spelled, expand them (LLMs → Large Language Models) and make sure the spelling is correct (low code -> Low Code).
101 | 2. After including the user’s keywords, you may add a few additional ones based on their profile but the max keywords should never exceed 6.
102 | 3. Do not add vague or non-extractable terms like "Market Trends." Stick to concrete keywords people actually mention (e.g. Valuation, Layoffs, Job Market).
103 | 4. Use common sense:
104 |    - Non-technical users → skip heavy jargon keywords unless specified.
105 |    - Technical users → include relevant frameworks, platforms, and methods.
106 |    - CFOs, investors, economists → you can include Valuation, Layoffs, Inflation, Costs, etc.
107 |    - Designers → include Figma, Adobe, Canva, Generative Images.
108 |    - AI engineers → include Agentic AI, Agents, RAG, Hugging Face.
109 |    - Researchers → include Large Language Models, GPU, embeddings, Fine Tuning.
110 | 
111 | ---
112 | 
113 | Step 4. Time Period
114 | - Only use the time period the user explicitly asks for.
115 | - If one is not provided, use weekly.
116 | 
117 | ---
118 | 
119 | Step 5. Concise Summaries
120 | - If the user profile suggests they want brevity (investor, CxO, manager) → concise_summaries: true.
121 | - If they prefer detail (developer, researcher) → concise_summaries: false.
122 | 
123 | ---
124 | 
125 | Output Format (JSON only)
126 | 
127 | {
128 |   "personality": "short description",
129 |   "major_categories": ["one to three categories"],
130 |   "minor_categories": ["one to three categories"],
131 |   "keywords": ["3-6 keywords, always including user-provided ones"],
132 |   "time_period": "daily | weekly | monthly | quarterly",
133 |   "concise_summaries": true | false
134 | }
135 | """
136 | 
137 | class SummaryResponse(BaseModel):
138 |     long_summary: str
139 |     concise_summary: str
140 |     title: str
141 | 
142 | PROMPT_SUMMARY_SYSTEM = """
143 | Your job is to build news synthesis, fact finding and analysis for a person with this profile:
144 | 
145 | Name: {name}
146 | Personality: {personality}
147 | User notes: {user_interests}
148 | Wants concise summaries? {concise_summaries}
149 | 
150 | You will be dumped with information for the time period "{time_period}" fetched from our database that we have found to be relevant to the user, some trending keywords and some top keywords found, 
151 | along with data we have already aggregated to drag out what people say and the posts that have been shared with the source numbers for each one.
152 | 
153 | Your job is to synthesize all this information to a {time_period} report so the user can get a grasp on what is happening. 
154 | Get to the point. 
155 | Don’t repeat the dataset—extract patterns, second-order effects, and contrarian takes.
156 | 
157 | If you need help: you can first Identify 3–5 cross-cutting themes across items, explain “so what” based on the user profile, pull in what people are discussing (consensus vs skepticism) and why it matters. Build out a story that is easy to follow.
158 | 
159 | You may decide to ignore noise for the reason that you don't think it will be useful for the user profile and will cause information overload.
160 | 
161 | Build a report for the user in less than 3-4 paragraphs with around 1300 to 1800 characters for the short summary and 5-7 paragraphs with less than 6000 characters for the long summary. 
162 | For each title of the paragraph, but bold **title:** formatting.
163 | For each report end with a few notes in one short paragaph at the end on what this means for them and what to look out for (seeing as you see more information than they do.)
164 | For a title you can pick a very short sentence of 2-3 words.
165 | 
166 | Summarize the start of the report with one or two sentences on what it is about along with naming the user to make sure they know it's their report.
167 | 
168 | Remember to keep it to what you think the user will be interested in, never generalize.
169 | 
170 | Make sure to keep the citations exactlt as is, [n] (ex. [1:12] with each fact you use as we will parse those later.
171 | """
172 | 
173 | class Theme(BaseModel):
174 |     title: str
175 |     relevance: int
176 |     key_points: List[str]
177 |     supporting_keywords: List[str]
178 | 
179 | class AnalysisResponse(BaseModel):
180 |     themes: List[Theme]
181 |     overall_focus: str
182 |     user_priority_reasoning: str
183 | 
184 | PROMPT_ANALYSIS_SYSTEM = """
185 | Your job is to synthesize data we picked up on tech forums, blogs and social media to identify the most relevant themes for a personalized report on what is going on.
186 | You should cut out noise and identify information important for the user while keeping it entertaining.
187 | 
188 | Your task:
189 | 1. Identify 5–7 cross-cutting themes across the data. It should include what people are discussing (consensus vs skepticism). It should be relevant to the user's profile and to the data itself (don't ignore major happenings). The themes should be about extracting patterns, second-order effects, and contrarian takes.
190 | 2. Rank each theme by relevance to the user (1-10 scale, 10 being most relevant)
191 | 3. For each theme, provide:
192 |    - "title":A clear title (2-4 words)
193 |    - "relevance": relevance score based on user profile
194 |    - "key_points": 5-7 key_points that should be covered with citations kept in the exact format [n:n].
195 |    - "supporting_keywords": supporting_keywords from the data that support this theme.
196 | 
197 | Don't:
198 | Don’t repeat the dataset as is only focusing on some of the data.
199 | 
200 | Do:
201 | Take in all the data and then decide what is most important.
202 | 
203 | Consider the user's:
204 | - Name: {name}
205 | - Personality: {personality}  
206 | - Interests: {user_interests}
207 | - Time period preference: {time_period}
208 | 
209 | Remember if they are non-technical, or semi-technical you should not be including keywords that they won't understand. 
210 | Keywords like Kubernetes, Proxmox, and maybe even Docker is not for non-technical people unless they are asking for this specifically (they are working in this domain).
211 | Be smart around what you decide to include based on what you think they already know.
212 | 
213 | Focus on themes that would be most valuable and interesting to this specific user.
214 | """
215 | 
216 | 
217 | PROMPT_THEME_SUMMARY_SYSTEM = """
218 | Your job is to build a personalized news synthesis based on pre-identified themes for a person with this profile:
219 | 
220 | Name: {name}
221 | Personality: {personality}
222 | User notes: {user_interests}
223 | Wants concise summaries? {concise_summaries}
224 | 
225 | You will receive:
226 | 1. A theme analysis with ranked themes and key points
227 | 2. Full keyword data for the time period "{time_period}"
228 | 
229 | Your task is to write focused synthesized reports that cover the identified themes in order of relevance. Use data from the key points and the full dataset to build your answer.
230 | Get to the point but don't overload the user with information. 
231 | Don't repeat the dataset—extract patterns, second-order effects, and contrarian takes.
232 | 
233 | What you should create:
234 | - Short summary: 3-4 body paragraphs with up to 3 themes, 1400-2000 characters
235 | - Long summary: 5-7 body paragraphs with up to 6 themes, less than 7000 characters
236 | - Title: 2-3 words maximum
237 | 
238 | Always do this when building the summaries:
239 | - Start the report with one or two sentences in one small introduction paragraph about what it covers and name the user to make it personal.
240 | - Use bold **title:** formatting for each paragraph titles.
241 | - End with a short paragraph on what this means for them and what to look out for.
242 | 
243 | Do not:
244 | - Overload the user with information so it becomes incomprehensive. 
245 | - Present the data as facts, it is what people are saying on social media, blogs and tech forums.
246 | - Add in themes or information that the user may not be interested in.
247 | 
248 | Build around the themes provided, never repeat the data, instead focus on the themes and explain "so what" based on the user profile, 
249 | Pull in what people are discussing (consensus vs skepticism) and why it matters for this specific user.
250 | Focus only on what the user will find interesting based on the theme analysis. Never generalize.
251 | 
252 | Keep citations exactly as provided [n:n] format (ex. [1:12]) as we will parse them later.
253 | """


--------------------------------------------------------------------------------