├── helpers ├── commands │ ├── __init__.py │ ├── setup.py │ └── news.py ├── functions │ ├── discord_verify.py │ ├── discord_request.py │ ├── count_citations.py │ ├── data_utils.py │ ├── category_utils.py │ ├── progress_tracker.py │ ├── renumber_citations.py │ ├── llm_runner.py │ ├── openai_runner.py │ ├── send_profile_to_db.py │ ├── llm_summary.py │ ├── api_utils.py │ ├── process_citations.py │ ├── discord_updates.py │ └── format_data.py └── config │ └── llm_schemas.py ├── images ├── secrets_modal.png ├── example_discord.png └── sources_tech_bot_discord.png ├── requirements.txt ├── .gitignore ├── app.py └── README.md /helpers/commands/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /images/secrets_modal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ilsilfverskiold/ai-personalized-tech-reports-discord/HEAD/images/secrets_modal.png -------------------------------------------------------------------------------- /images/example_discord.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ilsilfverskiold/ai-personalized-tech-reports-discord/HEAD/images/example_discord.png -------------------------------------------------------------------------------- /images/sources_tech_bot_discord.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ilsilfverskiold/ai-personalized-tech-reports-discord/HEAD/images/sources_tech_bot_discord.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi>=0.110.0 2 | modal 3 | requests 4 | pydantic 5 | python-dotenv 6 | PyNaCl 7 | aiohttp 8 | llama-index-core 9 | llama-index-llms-gemini 10 | google-generativeai 11 | google-genai 12 | llama-index-llms-openai 13 | openai 14 | pymongo[srv]>=4.6 15 | 16 | -------------------------------------------------------------------------------- /helpers/functions/discord_verify.py: -------------------------------------------------------------------------------- 1 | import nacl.signing 2 | 3 | 4 | def verify_signature(pk_hex: str, sig_hex: str, ts: str, body: bytes) -> bool: 5 | try: 6 | nacl.signing.VerifyKey(bytes.fromhex(pk_hex)).verify(ts.encode() + body, bytes.fromhex(sig_hex)) 7 | return True 8 | except Exception: 9 | return False 10 | 11 | 12 | -------------------------------------------------------------------------------- /helpers/functions/discord_request.py: -------------------------------------------------------------------------------- 1 | import os 2 | from fastapi import Request 3 | from helpers.functions.discord_verify import verify_signature 4 | 5 | async def extract_verified_body(request: Request) -> bytes | None: 6 | sig = request.headers.get("X-Signature-Ed25519") 7 | ts = request.headers.get("X-Signature-Timestamp") 8 | body = await request.body() 9 | pk = os.environ.get("DISCORD_PUBLIC_KEY") 10 | if not (sig and ts and pk and verify_signature(pk, sig, ts, body)): 11 | return None 12 | return body 13 | 14 | 15 | -------------------------------------------------------------------------------- /helpers/functions/count_citations.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | def count_total_citations(assembled: Dict) -> int: 4 | total_count = 0 5 | for category_type in assembled.values(): 6 | if isinstance(category_type, dict): 7 | for category_data in category_type.values(): 8 | if isinstance(category_data, list): 9 | for keyword_obj in category_data: 10 | citations = keyword_obj.get("citations", []) 11 | total_count += len(citations) 12 | elif isinstance(category_data, dict): 13 | for sort_data in category_data.values(): 14 | if isinstance(sort_data, list): 15 | for keyword_obj in sort_data: 16 | citations = keyword_obj.get("citations", []) 17 | total_count += len(citations) 18 | 19 | return total_count 20 | -------------------------------------------------------------------------------- /helpers/functions/data_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | 3 | def add_profile_keywords(assembled: Dict, profile: Dict) -> None: 4 | """Add user's tracked keywords to the assembled data structure.""" 5 | profile_keywords = profile.get("keywords", []) or [] 6 | if not profile_keywords: 7 | return 8 | 9 | if "keywords" not in assembled: 10 | assembled["keywords"] = {} 11 | 12 | for keyword in profile_keywords: 13 | if keyword: 14 | if "profile" not in assembled["keywords"]: 15 | assembled["keywords"]["profile"] = [] 16 | 17 | keyword_obj = {"keyword": keyword} 18 | assembled["keywords"]["profile"].append(keyword_obj) 19 | 20 | def get_all_keyword_objects(assembled: Dict) -> List[Dict]: 21 | """Extract all keyword objects from the assembled data structure.""" 22 | all_keyword_objects = [] 23 | for category_type in assembled.values(): 24 | if isinstance(category_type, dict): 25 | for category_data in category_type.values(): 26 | if isinstance(category_data, list): 27 | all_keyword_objects.extend(category_data) 28 | elif isinstance(category_data, dict): 29 | for sort_data in category_data.values(): 30 | if isinstance(sort_data, list): 31 | all_keyword_objects.extend(sort_data) 32 | return all_keyword_objects 33 | -------------------------------------------------------------------------------- /helpers/functions/category_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | from helpers.config.llm_schemas import CATEGORY_MAP 3 | 4 | def find_category_name(input_category: str) -> str: 5 | """Find the normalized category name from input string.""" 6 | input_lower = input_category.lower().strip() 7 | if input_lower in CATEGORY_MAP: 8 | return input_lower 9 | for short_name, long_name in CATEGORY_MAP.items(): 10 | if input_lower == long_name.lower(): 11 | return short_name 12 | return None 13 | 14 | def normalize_profile_categories(profile: Dict, time_period_override: str = None) -> tuple[List[str], List[str], str]: 15 | """Normalize profile categories and return major, minor categories and time period.""" 16 | def normalize_list(raw: List[str]) -> List[str]: 17 | seen, out = set(), [] 18 | for category in raw or []: 19 | category_name = find_category_name(category) 20 | if category_name and category_name not in seen: 21 | seen.add(category_name) 22 | out.append(category_name) 23 | return out 24 | 25 | major = normalize_list(profile.get("major_categories") or []) 26 | minor = normalize_list(profile.get("minor_categories") or []) 27 | 28 | major_set = set(major) 29 | minor = [cat for cat in minor if cat not in major_set] 30 | 31 | if time_period_override: 32 | period = time_period_override.lower() 33 | else: 34 | period = (profile.get("time_period") or "weekly").lower() 35 | 36 | return major, minor, period 37 | -------------------------------------------------------------------------------- /helpers/functions/progress_tracker.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import time 3 | from helpers.functions.discord_updates import patch_original 4 | 5 | def start_progress_tracker(application_id: str, token: str, total_keywords: int, facts_done_event): 6 | """Start background thread to send progress updates to Discord.""" 7 | 8 | def progress_tracker(): 9 | time.sleep(15) 10 | if not facts_done_event.is_set(): 11 | patch_original(application_id, token, f"We dig into all of these sources one by one, to drag out what's interesting.") 12 | patch_original(application_id, token, f"Each keyword can have hundreds of sources, so it may take a while.") 13 | time.sleep(30) 14 | if not facts_done_event.is_set(): 15 | patch_original(application_id, token, "You can check back here later.") 16 | time.sleep(30) 17 | if not facts_done_event.is_set(): 18 | patch_original(application_id, token, "We're almost there, remember go do something else.") 19 | time.sleep(30) 20 | if not facts_done_event.is_set(): 21 | patch_original(application_id, token, "Since you're first we are digging for the first time today.") 22 | patch_original(application_id, token, "The first run of the day is always slow for LLM concurrency limits.") 23 | time.sleep(15) 24 | if not facts_done_event.is_set(): 25 | patch_original(application_id, token, "You can check back here later.") 26 | 27 | tracker_thread = threading.Thread(target=progress_tracker) 28 | tracker_thread.daemon = True 29 | tracker_thread.start() 30 | return tracker_thread 31 | -------------------------------------------------------------------------------- /helpers/functions/renumber_citations.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Any 2 | import re 3 | 4 | 5 | def renumber_keywords_and_citations(assembled: Dict) -> Dict: 6 | keyword_counter = 1 7 | for category_type in assembled.values(): 8 | if isinstance(category_type, dict): 9 | for category_data in category_type.values(): 10 | if isinstance(category_data, list): 11 | for keyword_obj in category_data: 12 | if keyword_obj.get("keyword"): 13 | _process_keyword_object(keyword_obj, keyword_counter) 14 | keyword_counter += 1 15 | elif isinstance(category_data, dict): 16 | for sort_data in category_data.values(): 17 | if isinstance(sort_data, list): 18 | for keyword_obj in sort_data: 19 | if keyword_obj.get("keyword"): 20 | _process_keyword_object(keyword_obj, keyword_counter) 21 | keyword_counter += 1 22 | return assembled 23 | 24 | 25 | def _process_keyword_object(keyword_obj: Dict, keyword_num: int) -> None: 26 | keyword_obj["keyword_number"] = keyword_num 27 | 28 | citations = keyword_obj.get("citations", []) 29 | if not citations: 30 | return 31 | 32 | old_to_new_citation = {} 33 | for i, citation in enumerate(citations, 1): 34 | old_n = citation.get("n") 35 | if old_n: 36 | new_citation_num = f"{keyword_num}:{i}" 37 | old_to_new_citation[old_n] = new_citation_num 38 | citation["n"] = new_citation_num 39 | 40 | summary = keyword_obj.get("summary", "") 41 | if summary: 42 | updated_summary = _update_inline_citations(summary, old_to_new_citation) 43 | keyword_obj["summary"] = updated_summary 44 | 45 | interesting = keyword_obj.get("interesting", []) 46 | if interesting: 47 | updated_interesting = [] 48 | for item in interesting: 49 | updated_item = _update_inline_citations(item, old_to_new_citation) 50 | updated_interesting.append(updated_item) 51 | keyword_obj["interesting"] = updated_interesting 52 | 53 | def _update_inline_citations(text: str, citation_mapping: Dict[int, str]) -> str: 54 | def replace_citation(match): 55 | old_num = int(match.group(1)) 56 | new_citation = citation_mapping.get(old_num) 57 | if new_citation: 58 | return f"[{new_citation}]" 59 | return match.group(0) 60 | 61 | updated_text = re.sub(r'\[(\d+)\]', replace_citation, text) 62 | return updated_text 63 | -------------------------------------------------------------------------------- /helpers/functions/llm_runner.py: -------------------------------------------------------------------------------- 1 | import os 2 | import traceback 3 | from typing import Type, Any 4 | import json 5 | from llama_index.llms.gemini import Gemini 6 | from llama_index.core.program import LLMTextCompletionProgram 7 | from llama_index.llms.openai import OpenAI as OpenAILLM 8 | 9 | def run_llm_structured( 10 | prompt_template: str, 11 | output_cls: Type[Any], 12 | variables: dict, 13 | model: str | None = None, 14 | provider: str = "gemini", 15 | retries: int = 2, 16 | system_template: str | None = None 17 | ): 18 | last_error: Exception | None = None 19 | for attempt in range(retries + 1): 20 | try: 21 | if provider == "gemini": 22 | api_key = os.environ.get("GOOGLE_API_KEY") 23 | llm = Gemini( 24 | api_key=api_key, 25 | model=model or "models/gemini-2.5-flash", 26 | temperature=0.2, 27 | max_tokens=1024, 28 | ) 29 | elif provider == "openai": 30 | api_key = os.environ.get("OPENAI_API_KEY") 31 | llm_kwargs = { 32 | "api_key": api_key, 33 | "model": model or "gpt-4o-mini", 34 | 'json_mode': True, 35 | "temperature": 0.2, 36 | "max_tokens": 1024, 37 | } 38 | llm = OpenAILLM(**llm_kwargs) 39 | else: 40 | raise ValueError(f"Unknown provider: {provider}") 41 | 42 | formatted_prompt = system_template + "\n\n" + prompt_template.format(**variables) 43 | 44 | print(f"LLM formatted prompt: {formatted_prompt}") 45 | program_kwargs = { 46 | "output_cls": output_cls, 47 | "prompt_template_str": formatted_prompt, 48 | "llm": llm, 49 | } 50 | 51 | program = LLMTextCompletionProgram.from_defaults(**program_kwargs) 52 | result = program() 53 | 54 | if isinstance(result, output_cls): 55 | return result 56 | if isinstance(result, dict): 57 | return output_cls(**result) 58 | if isinstance(result, str): 59 | try: 60 | data = json.loads(result) 61 | except Exception: 62 | start = result.find("{") 63 | end = result.rfind("}") 64 | if start != -1 and end != -1 and end > start: 65 | data = json.loads(result[start : end + 1]) 66 | else: 67 | raise ValueError("Model output is not valid JSON string") 68 | return output_cls(**data) 69 | 70 | raise TypeError(f"Unexpected program output type: {type(result)}") 71 | except Exception as e: 72 | last_error = e 73 | print(f"run_llm_structured attempt {attempt + 1} failed: {e}") 74 | print(traceback.format_exc()) 75 | continue 76 | raise last_error if last_error else RuntimeError("Unknown error in run_llm_structured") 77 | 78 | 79 | -------------------------------------------------------------------------------- /helpers/functions/openai_runner.py: -------------------------------------------------------------------------------- 1 | from typing import Type 2 | from pydantic import BaseModel 3 | import json, os, traceback 4 | from openai import OpenAI 5 | 6 | def run_openai_structured( 7 | system_prompt: str, 8 | user_prompt: str, 9 | output_cls: Type[BaseModel], 10 | model: str = "gpt-5", 11 | retries: int = 2, 12 | reasoning_effort: str = "medium", 13 | ): 14 | client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) 15 | last_error = None 16 | 17 | for attempt in range(retries + 1): 18 | try: 19 | print(f"OpenAI attempt {attempt + 1}/{retries + 1}") 20 | 21 | schema_dict = output_cls.model_json_schema() 22 | schema_str = json.dumps(schema_dict, indent=2) 23 | 24 | response = client.responses.create( 25 | model=model, 26 | instructions=( 27 | system_prompt 28 | + f"\n\nYou must respond with valid json that matches this exact schema:\n{schema_str}." 29 | ), 30 | input=f"{user_prompt}. Return a json object that matches the provided schema exactly. No prose.", 31 | text={"format": {"type": "json_object"}, "verbosity": "medium"}, 32 | reasoning={"effort": reasoning_effort}, 33 | tools=[], 34 | include=["reasoning.encrypted_content"], 35 | ) 36 | 37 | content = getattr(response, "output_text", None) 38 | if not content: 39 | content = "" 40 | for item in getattr(response, "output", []) or []: 41 | for block in getattr(item, "content", []) or []: 42 | if getattr(block, "type", "") in ("output_text", "input_text"): 43 | content = getattr(block, "text", "") or "" 44 | if content: 45 | break 46 | if content: 47 | break 48 | 49 | if not content: 50 | raise ValueError("No textual content returned by Responses API.") 51 | 52 | try: 53 | data = json.loads(content) 54 | except json.JSONDecodeError: 55 | start, end = content.find("{"), content.rfind("}") 56 | if start == -1 or end == -1 or end <= start: 57 | raise 58 | data = json.loads(content[start : end + 1]) 59 | 60 | try: 61 | return output_cls(**data) 62 | except Exception as validation_error: 63 | print(f"Pydantic validation failed: {validation_error}") 64 | raise validation_error 65 | 66 | except Exception as e: 67 | last_error = e 68 | print(f"OpenAI attempt {attempt + 1} failed: {e}") 69 | if "validation" in str(e).lower(): 70 | print(f"This appears to be a schema mismatch - check field names in Pydantic model") 71 | print(traceback.format_exc()) 72 | continue 73 | 74 | print(f"All OpenAI attempts failed. Last error: {last_error}") 75 | return None 76 | -------------------------------------------------------------------------------- /helpers/functions/send_profile_to_db.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Any, Optional 2 | from helpers.functions.llm_runner import run_llm_structured 3 | from helpers.config.llm_schemas import ProfileNotesResponse, PROMPT_PROFILE_NOTES 4 | from helpers.functions.discord_updates import patch_original 5 | import traceback 6 | import os 7 | from pymongo import MongoClient 8 | from datetime import datetime, timezone 9 | 10 | def _get_mongo_collection() -> Any: 11 | uri = os.environ.get("MONGO_DB_URI") 12 | if not uri: 13 | raise RuntimeError("MONGO_DB_URI not set") 14 | client = MongoClient(uri) 15 | db_name = "Discord" 16 | db = client[db_name] 17 | return db["user_profiles"] 18 | 19 | def send_profile_to_db(profile_data: Dict, application_id: Optional[str] = None, token: Optional[str] = None) -> None: 20 | try: 21 | notes: ProfileNotesResponse = run_llm_structured( 22 | prompt_template="The user's profile summary: {summary}", 23 | variables={"summary": profile_data.get('summary')}, 24 | output_cls=ProfileNotesResponse, 25 | model='models/gemini-2.5-flash', 26 | provider='gemini', 27 | system_template=PROMPT_PROFILE_NOTES, 28 | ) 29 | user_id = profile_data.get("user_id") 30 | doc = { 31 | "user_id": user_id, 32 | "username": profile_data.get("user_name"), 33 | "name": profile_data.get("global_name"), 34 | "user_interests": (profile_data.get("responses", {}) or {}).get("interests"), 35 | "user_keywords_input": (profile_data.get("responses", {}) or {}).get("keywords"), 36 | "user_connecting_keywords": (profile_data.get("responses", {}) or {}).get("connecting_keywords"), 37 | "user_summary_style": (profile_data.get("responses", {}) or {}).get("summary_style"), 38 | "personality": getattr(notes, "personality", None), 39 | "major_categories": getattr(notes, "major_categories", []) or [], 40 | "minor_categories": getattr(notes, "minor_categories", []) or [], 41 | "keywords": getattr(notes, "keywords", []) or [], 42 | "time_period": getattr(notes, "time_period", None), 43 | "concise_summaries": getattr(notes, "concise_summaries", False), 44 | } 45 | 46 | try: 47 | col = _get_mongo_collection() 48 | col.update_one( 49 | {"user_id": user_id}, 50 | { 51 | "$set": {**doc, "updated_at": datetime.now(timezone.utc)}, 52 | "$setOnInsert": {"created_at": datetime.now(timezone.utc)}, 53 | }, 54 | upsert=True, 55 | ) 56 | if application_id and token: 57 | patch_original(application_id, token, "**Profile setup complete.** Your personalized profile is ready. Try `/news` whenever you want.") 58 | 59 | except Exception as db_err: 60 | print("MongoDB error:", db_err) 61 | print(traceback.format_exc()) 62 | if application_id and token: 63 | patch_original(application_id, token, "**Profile setup failed.** Please try again later.") 64 | except Exception as e: 65 | print("send_profile_to_db error:", e) 66 | print(traceback.format_exc()) 67 | 68 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # Discord Bot specific 132 | # Secrets and API keys 133 | *.env 134 | .env.* 135 | secrets.json 136 | config.json 137 | bot_config.py 138 | 139 | # Logs 140 | *.log 141 | logs/ 142 | 143 | # Database files 144 | *.db 145 | *.sqlite 146 | *.sqlite3 147 | 148 | # Modal specific 149 | .modal/ 150 | 151 | # IDE and editor files 152 | .vscode/ 153 | .idea/ 154 | .cursor/ 155 | *.swp 156 | *.swo 157 | *~ 158 | 159 | # OS generated files 160 | .DS_Store 161 | .DS_Store? 162 | ._* 163 | .Spotlight-V100 164 | .Trashes 165 | ehthumbs.db 166 | Thumbs.db 167 | 168 | # Temporary files 169 | *.tmp 170 | *.temp 171 | temp/ 172 | tmp/ 173 | 174 | # Node modules (if any) 175 | node_modules/ 176 | 177 | # Backup files 178 | *.bak 179 | *.backup 180 | *.old 181 | 182 | # Archive files 183 | *.zip 184 | *.tar.gz 185 | *.rar 186 | *.7z 187 | 188 | # Images that might contain sensitive info 189 | screenshots/ 190 | *.screenshot.* 191 | 192 | # Local development files 193 | local_* 194 | dev_* 195 | test_* 196 | -------------------------------------------------------------------------------- /helpers/functions/llm_summary.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | from helpers.functions.openai_runner import run_openai_structured 3 | from helpers.config.llm_schemas import AnalysisResponse, SummaryResponse, PROMPT_ANALYSIS_SYSTEM, PROMPT_THEME_SUMMARY_SYSTEM 4 | 5 | def analyze_themes(formatted_data: str, profile: Dict, time_period: str) -> AnalysisResponse | None: 6 | """Analyze keyword data to identify relevant themes using medium reasoning.""" 7 | try: 8 | system_variables = { 9 | "name": profile.get("name") or profile.get("username", ""), 10 | "personality": profile.get("personality", ""), 11 | "user_interests": profile.get("interests", ""), 12 | "time_period": time_period 13 | } 14 | 15 | system_prompt = PROMPT_ANALYSIS_SYSTEM.format(**system_variables) 16 | user_prompt = f"Analyze this keyword data and identify the most relevant themes: {formatted_data}" 17 | 18 | analysis_result: AnalysisResponse = run_openai_structured( 19 | system_prompt=system_prompt, 20 | user_prompt=user_prompt, 21 | output_cls=AnalysisResponse, 22 | model="gpt-5", 23 | reasoning_effort="medium" 24 | ) 25 | 26 | print(f"Analysis result: {len(analysis_result.themes)} themes identified") 27 | for theme in analysis_result.themes: 28 | print(f"Theme: {theme.title} (Score: {theme.relevance}/10)") 29 | return analysis_result 30 | except Exception as e: 31 | print(f"Theme analysis failed: {e}") 32 | return None 33 | 34 | def generate_summary_from_analysis(analysis: AnalysisResponse, formatted_data: str, profile: Dict, time_period: str) -> SummaryResponse | None: 35 | """Generate summaries from theme analysis using high reasoning.""" 36 | try: 37 | system_variables = { 38 | "name": profile.get("name") or profile.get("username", ""), 39 | "personality": profile.get("personality", ""), 40 | "user_interests": profile.get("interests", ""), 41 | "concise_summaries": profile.get("concise_summaries", False), 42 | "time_period": time_period 43 | } 44 | 45 | system_prompt = PROMPT_THEME_SUMMARY_SYSTEM.format(**system_variables) 46 | themes_text = "\n".join([ 47 | f"Theme: {theme.title} (Relevance: {theme.relevance}/10)\n" 48 | f"Key points: {', '.join(theme.key_points)}\n" 49 | f"Keywords: {', '.join(theme.supporting_keywords)}\n" 50 | for theme in analysis.themes 51 | ]) 52 | 53 | user_prompt = f"""Based on this theme analysis: 54 | {themes_text} 55 | 56 | Overall focus: {analysis.overall_focus} 57 | Priority reasoning: {analysis.user_priority_reasoning} 58 | 59 | And this full data: {formatted_data} 60 | 61 | Write comprehensive long_summary and concise_summary focusing on the identified themes. Keep citations intact [n:n] format. Return a title too.""" 62 | 63 | summary_result: SummaryResponse = run_openai_structured( 64 | system_prompt=system_prompt, 65 | user_prompt=user_prompt, 66 | output_cls=SummaryResponse, 67 | model="gpt-5", 68 | reasoning_effort="medium" 69 | ) 70 | 71 | return summary_result 72 | except Exception as e: 73 | print(f"Summary generation from analysis failed: {e}") 74 | return None 75 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import os, json, modal 2 | from fastapi import Request, Response, BackgroundTasks 3 | from helpers.functions.send_profile_to_db import send_profile_to_db 4 | from helpers.functions.discord_request import extract_verified_body 5 | from helpers.commands.news import run_news_updates 6 | from helpers.commands.setup import ( 7 | handle_setup_command, 8 | handle_setup_button_interaction, 9 | handle_modal_submission, 10 | ) 11 | 12 | APP_NAME = "modal-webhook-echo" 13 | SECRET_NAME = "safron-bot" 14 | 15 | def create_image(): 16 | return modal.Image.debian_slim().pip_install( 17 | "fastapi>=0.110", 18 | "requests", 19 | "pydantic", 20 | "PyNaCl", 21 | "aiohttp", 22 | "llama-index-core", 23 | "llama-index-llms-gemini", 24 | "google-generativeai", 25 | "google-genai", 26 | "llama-index-llms-openai", 27 | "openai", 28 | "pymongo[srv]>=4.6" 29 | ).add_local_python_source("helpers") 30 | 31 | app = modal.App(APP_NAME, secrets=[modal.Secret.from_name(SECRET_NAME)]) 32 | image = create_image() 33 | 34 | @app.function(image=image, cpu=0.125, scaledown_window=300, min_containers=1, timeout=900, secrets=[modal.Secret.from_name(SECRET_NAME)]) 35 | @modal.fastapi_endpoint(method="POST") 36 | async def discord_interactions(request: Request, background_tasks: BackgroundTasks): 37 | body = await extract_verified_body(request) 38 | if body is None: 39 | return Response(status_code=401) 40 | 41 | data = json.loads(body.decode("utf-8")) 42 | t = data.get("type") 43 | 44 | if t == 1: 45 | return {"type": 1} 46 | 47 | if t == 2: 48 | cmd = data.get("data", {}).get("name") 49 | user = (data.get("member") or {}).get("user") or {} 50 | user_id, user_name, global_name = user.get("id"), user.get("username"), user.get("global_name") 51 | 52 | if cmd == "setup": 53 | return await handle_setup_command(data, user_id, user_name, global_name) 54 | 55 | if cmd == "news": 56 | application_id = data.get("application_id") 57 | token = data.get("token") 58 | channel_id = data.get("channel_id") 59 | options = data.get("data", {}).get("options", []) 60 | 61 | time_period_override = None 62 | for option in options: 63 | if option.get("name") == "time_period": 64 | time_period_override = option.get("value") 65 | break 66 | 67 | if application_id and token and user_id: 68 | background_tasks.add_task(run_news_updates, application_id, token, user_id, time_period_override, channel_id) 69 | return {"type": 5, "data": {"flags": 64}} 70 | 71 | return {"type": 4, "data": {"content": "Unknown command", "flags": 64}} 72 | 73 | if t == 3: 74 | resp = await handle_setup_button_interaction(data) 75 | return resp or {"type": 6} 76 | 77 | if t == 5: 78 | try: 79 | result = await handle_modal_submission(data) 80 | if not result: 81 | return {"type": 6} 82 | response_payload, profile_data = result 83 | application_id = data.get("application_id") 84 | token = data.get("token") 85 | background_tasks.add_task(send_profile_to_db, profile_data, application_id, token) 86 | return response_payload 87 | except Exception as e: 88 | print(f"Error in modal submission: {e}") 89 | return {"type": 4, "data": {"content": "An error occurred processing your submission.", "flags": 64}} 90 | 91 | return Response(status_code=200) 92 | 93 | -------------------------------------------------------------------------------- /helpers/functions/api_utils.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import re 3 | import time 4 | from typing import Dict, List 5 | 6 | def fetch_keywords(period: str, api_category_name: str, sort: str) -> List[Dict]: 7 | """Fetch keywords from Safron API for a specific category and sort type.""" 8 | try: 9 | url = "https://public.api.safron.io/v2/keywords" 10 | params = {"period": period, "category": api_category_name, "sort": sort, "slim": "false"} 11 | r = requests.get(url, params=params, timeout=45) 12 | r.raise_for_status() 13 | data = r.json() or {} 14 | keywords = [] 15 | for item in data.get("keywords", []): 16 | if item.get("keyword"): 17 | keyword_data = { 18 | "keyword": item.get("keyword"), 19 | "trending": item.get("trending", False), 20 | "count": item.get("count", 0), 21 | "change_in_count": item.get("change_in_count", 0), 22 | "engagement": item.get("engagement", 0), 23 | "change_in_engagement": item.get("change_in_engagement", 0), 24 | "sentiment": item.get("sentiment", {}) 25 | } 26 | keywords.append(keyword_data) 27 | return keywords 28 | except Exception: 29 | import traceback 30 | print("Keyword fetch failed:", period, api_category_name, sort) 31 | print(traceback.format_exc()) 32 | return [] 33 | 34 | def fetch_keyword_facts(keyword: str, period: str, max_retries: int = 3) -> Dict: 35 | """Fetch facts for a specific keyword from Safron AI facts API.""" 36 | for attempt in range(max_retries): 37 | try: 38 | print(f"Fetching facts for: {keyword} (attempt {attempt + 1}/{max_retries})") 39 | url = "https://public.api.safron.io/v2/ai-keyword-facts" 40 | payload = {"keywords": keyword, "period": period} 41 | r = requests.post(url, json=payload, timeout=60) 42 | r.raise_for_status() 43 | data = r.json() or {} 44 | 45 | summary = data.get("summary", "") 46 | interesting = data.get("interesting", []) 47 | all_citations = data.get("citations", []) 48 | 49 | if not summary and not interesting: 50 | facts = data.get("facts", []) 51 | if facts: 52 | if len(facts) >= 3: 53 | summary = " ".join(facts[:3]) 54 | interesting = facts[3:] 55 | elif len(facts) >= 1: 56 | summary = " ".join(facts) 57 | interesting = [] 58 | 59 | referenced_citations = set() 60 | 61 | for match in re.finditer(r'\[(\d+)\]', summary): 62 | referenced_citations.add(int(match.group(1))) 63 | 64 | for item in interesting: 65 | for match in re.finditer(r'\[(\d+)\]', item): 66 | referenced_citations.add(int(match.group(1))) 67 | 68 | filtered_citations = [ 69 | citation for citation in all_citations 70 | if citation.get("n") in referenced_citations 71 | ] 72 | 73 | return { 74 | "summary": summary, 75 | "interesting": interesting, 76 | "citations": filtered_citations 77 | } 78 | except Exception as e: 79 | print(f"Keyword facts fetch failed: {keyword} (attempt {attempt + 1}/{max_retries}) - {e}") 80 | if attempt < max_retries - 1: 81 | print(f"Retrying {keyword} in 2 seconds...") 82 | time.sleep(2) 83 | else: 84 | import traceback 85 | print(traceback.format_exc()) 86 | 87 | return {} 88 | -------------------------------------------------------------------------------- /helpers/functions/process_citations.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Dict, List, Tuple 3 | 4 | def process_citations_in_summaries(concise_summary: str, long_summary: str, assembled_data: Dict) -> Tuple[str, str, List[Dict], List[Dict]]: 5 | cleaned_concise, concise_citations = process_single_summary(concise_summary, assembled_data) 6 | cleaned_long, long_citations = process_single_summary(long_summary, assembled_data) 7 | 8 | return cleaned_concise, cleaned_long, concise_citations, long_citations 9 | 10 | def process_single_summary(summary: str, assembled_data: Dict) -> Tuple[str, List[Dict]]: 11 | citation_pattern = r'\[(\d+):(\d+)\]' 12 | 13 | groups = [] 14 | matches = list(re.finditer(citation_pattern, summary)) 15 | 16 | if not matches: 17 | return summary, [] 18 | 19 | i = 0 20 | while i < len(matches): 21 | current_group = [matches[i].group(0)] 22 | while i + 1 < len(matches): 23 | current_end = matches[i].end() 24 | next_start = matches[i + 1].start() 25 | 26 | if next_start - current_end <= 1: 27 | i += 1 28 | current_group.append(matches[i].group(0)) 29 | else: 30 | break 31 | 32 | groups.append(current_group) 33 | i += 1 34 | 35 | unique_groups = [] 36 | seen_groups = set() 37 | 38 | for group in groups: 39 | group_key = tuple(sorted(group)) 40 | if group_key not in seen_groups: 41 | unique_groups.append(group) 42 | seen_groups.add(group_key) 43 | 44 | citations_list = [] 45 | group_to_number = {} 46 | 47 | for i, group in enumerate(unique_groups, 1): 48 | group_urls = [] 49 | 50 | for citation_ref in group: 51 | match = re.match(r'\[(\d+):(\d+)\]', citation_ref) 52 | if match: 53 | keyword_num = int(match.group(1)) 54 | citation_num = int(match.group(2)) 55 | keyword_obj = _find_keyword_by_number(assembled_data, keyword_num) 56 | if keyword_obj and "citations" in keyword_obj: 57 | for citation in keyword_obj["citations"]: 58 | if citation.get("n") == f"{keyword_num}:{citation_num}": 59 | url = citation.get("url", "") 60 | if url: 61 | group_urls.append(url) 62 | break 63 | 64 | deduplicated_urls = list(dict.fromkeys(group_urls)) 65 | 66 | citations_list.append({ 67 | "n": i, 68 | "urls": deduplicated_urls 69 | }) 70 | 71 | group_key = tuple(sorted(group)) 72 | group_to_number[group_key] = i 73 | 74 | cleaned_summary = summary 75 | 76 | for group in groups: 77 | group_key = tuple(sorted(group)) 78 | new_number = group_to_number[group_key] 79 | 80 | group_text = ''.join(group) 81 | cleaned_summary = cleaned_summary.replace(group_text, f"[{new_number}]", 1) 82 | 83 | return cleaned_summary, citations_list 84 | 85 | 86 | def _find_keyword_by_number(assembled_data: Dict, keyword_num: int) -> Dict: 87 | for category_type in assembled_data.values(): 88 | if isinstance(category_type, dict): 89 | for category_data in category_type.values(): 90 | if isinstance(category_data, list): 91 | for keyword_obj in category_data: 92 | if keyword_obj.get("keyword_number") == keyword_num: 93 | return keyword_obj 94 | elif isinstance(category_data, dict): 95 | for sort_data in category_data.values(): 96 | if isinstance(sort_data, list): 97 | for keyword_obj in sort_data: 98 | if keyword_obj.get("keyword_number") == keyword_num: 99 | return keyword_obj 100 | return {} 101 | 102 | 103 | def format_citations_for_thread(citations_list: List[Dict], max_citations_per_message: int = 4) -> List[str]: 104 | if not citations_list: 105 | return [] 106 | 107 | messages = [] 108 | current_lines = ["**Sources:**"] 109 | citations_in_current_message = 0 110 | 111 | for citation in citations_list: 112 | n = citation["n"] 113 | urls = citation["urls"] 114 | 115 | citation_lines = [] 116 | if urls: 117 | formatted_urls = ", ".join([f"{url}" for url in urls]) 118 | citation_lines.append(f"[{n}] {formatted_urls}") 119 | else: 120 | citation_lines.append(f"[{n}] (source not found)") 121 | 122 | if citations_in_current_message >= max_citations_per_message and len(current_lines) > 1: 123 | messages.append("\n".join(current_lines)) 124 | current_lines = [] 125 | citations_in_current_message = 0 126 | 127 | current_lines.extend(citation_lines) 128 | citations_in_current_message += 1 129 | 130 | if current_lines: 131 | messages.append("\n".join(current_lines)) 132 | 133 | return messages 134 | -------------------------------------------------------------------------------- /helpers/functions/discord_updates.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import os 4 | 5 | def patch_original(application_id: str, token: str, content: str, ephemeral: bool = True) -> int: 6 | url = f"https://discord.com/api/v10/webhooks/{application_id}/{token}/messages/@original" 7 | payload = {"content": content} 8 | if ephemeral: 9 | payload["flags"] = 64 10 | resp = requests.patch(url, json=payload, timeout=10) 11 | return resp.status_code 12 | 13 | 14 | def post_followup(application_id: str, token: str, content: str, ephemeral: bool = True) -> int: 15 | url = f"https://discord.com/api/v10/webhooks/{application_id}/{token}" 16 | payload = {"content": content} 17 | if ephemeral: 18 | payload["flags"] = 64 19 | try: 20 | resp = requests.post(url, json=payload, timeout=10) 21 | resp.raise_for_status() 22 | return resp.status_code 23 | except Exception as e: 24 | print(f"post_followup failed: {e}") 25 | import traceback 26 | print(traceback.format_exc()) 27 | return 500 28 | 29 | def send_followup_get_msg(application_id: str, token: str, content: str) -> dict: 30 | """Send followup message and return full response with message_id and channel_id.""" 31 | url = f"https://discord.com/api/v10/webhooks/{application_id}/{token}?wait=true" 32 | r = requests.post(url, json={"content": content}, timeout=10) 33 | r.raise_for_status() 34 | return r.json() 35 | 36 | def post_channel_message(bot_token: str, channel_id: str, content: str): 37 | url = f"https://discord.com/api/v10/channels/{channel_id}/messages" 38 | headers = {"Authorization": f"Bot {bot_token}"} 39 | r = requests.post(url, headers=headers, json={"content": content}, timeout=10) 40 | r.raise_for_status() 41 | return r.json() 42 | 43 | def create_thread_from_message(bot_token: str, channel_id: str, message_id: str, name: str = "Discussion", auto_archive: int = 1440) -> str: 44 | """Create a thread from a message using bot token.""" 45 | url = f"https://discord.com/api/v10/channels/{channel_id}/messages/{message_id}/threads" 46 | headers = {"Authorization": f"Bot {bot_token}"} 47 | r = requests.post(url, headers=headers, json={"name": name, "auto_archive_duration": auto_archive}, timeout=10) 48 | r.raise_for_status() 49 | return r.json()["id"] 50 | 51 | def bot_post_in_thread(bot_token: str, thread_id: str, content: str) -> str: 52 | url = f"https://discord.com/api/v10/channels/{thread_id}/messages" 53 | headers = {"Authorization": f"Bot {bot_token}"} 54 | r = requests.post(url, headers=headers, json={"content": content, "flags": 4}, timeout=10) 55 | r.raise_for_status() 56 | return r.json()["id"] 57 | 58 | def post_followup_with_thread(application_id: str, token: str, content: str, ephemeral: bool = False, citations_list: list = None, username: str = None, channel_id: str = None, summary_title: str = None) -> int: 59 | try: 60 | bot_token = os.environ.get("BOT_TOKEN") 61 | if not bot_token: 62 | print("BOT_TOKEN not found, falling back to regular post") 63 | return post_followup(application_id, token, content[:1900], ephemeral) 64 | 65 | paragraphs = [p.strip() for p in content.split('\n') if p.strip()] 66 | 67 | if not paragraphs: 68 | return post_followup(application_id, token, content[:1900], ephemeral) 69 | 70 | first_paragraph = paragraphs[0] 71 | remaining_paragraphs = paragraphs[1:] 72 | 73 | if channel_id and bot_token: 74 | try: 75 | msg_data = post_channel_message(bot_token, channel_id, first_paragraph) 76 | message_id = msg_data.get("id") 77 | except Exception as e: 78 | print(f"Failed to post original message: {e}") 79 | else: 80 | print(f"Missing channel_id ({channel_id}) or bot_token ({bool(bot_token)}), using followup") 81 | msg_data = send_followup_get_msg(application_id, token, first_paragraph) 82 | message_id = msg_data.get("id") 83 | channel_id = msg_data.get("channel_id") 84 | 85 | if not message_id or not channel_id or not remaining_paragraphs: 86 | return 200 87 | 88 | from datetime import datetime 89 | 90 | if summary_title: 91 | thread_name = f"{summary_title} ({datetime.now().strftime('%m/%d')})" 92 | else: 93 | date_str = datetime.now().strftime("%m/%d") 94 | thread_name = f"{username}'s Summary ({date_str})" 95 | 96 | thread_id = create_thread_from_message(bot_token, channel_id, message_id, thread_name) 97 | 98 | thread_messages = [] 99 | current_message = "" 100 | 101 | for paragraph in remaining_paragraphs: 102 | if paragraph.strip(): 103 | test_message = current_message + ("\n\n" if current_message else "") + paragraph.strip() 104 | if len(test_message) <= 1900: 105 | current_message = test_message 106 | else: 107 | if current_message: 108 | thread_messages.append(current_message) 109 | current_message = paragraph.strip() 110 | 111 | if current_message: 112 | thread_messages.append(current_message) 113 | 114 | for message in thread_messages: 115 | bot_post_in_thread(bot_token, thread_id, message) 116 | 117 | if citations_list: 118 | from helpers.functions.process_citations import format_citations_for_thread 119 | citations_messages = format_citations_for_thread(citations_list) 120 | for citations_text in citations_messages: 121 | if citations_text: 122 | print(f"Posting citations: {citations_text}") 123 | bot_post_in_thread(bot_token, thread_id, citations_text) 124 | return 200 125 | 126 | except Exception as e: 127 | print(f"post_followup_with_thread failed: {e}") 128 | import traceback 129 | print(traceback.format_exc()) -------------------------------------------------------------------------------- /helpers/commands/setup.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | import asyncio 3 | from helpers.functions.send_profile_to_db import send_profile_to_db 4 | 5 | async def handle_setup_command(data: Dict, user_id: str, user_name: str, global_name: str): 6 | about_you = "" 7 | for option in (data.get("data", {}) or {}).get("options", []) or []: 8 | if option.get("name") == "about_you": 9 | about_you = (option.get("value") or "").strip() 10 | 11 | prefill = about_you.replace("\n", " ").replace("|", "/").strip()[:80] 12 | 13 | return { 14 | "type": 4, 15 | "data": { 16 | "content": f" Set up your profile, {global_name or user_name}! This will help build news reports that are customized to you.\n\n", 17 | "flags": 64, 18 | "components": [ 19 | { 20 | "type": 1, 21 | "components": [ 22 | { 23 | "type": 2, 24 | "style": 1, 25 | "label": "Open Setup Form", 26 | "custom_id": f"open_setup_modal|{prefill}" if prefill else "open_setup_modal" 27 | } 28 | ] 29 | } 30 | ] 31 | } 32 | } 33 | 34 | async def handle_setup_button_interaction(data: Dict): 35 | custom_id = (data.get("data", {}) or {}).get("custom_id") 36 | if not custom_id or not custom_id.startswith("open_setup_modal"): 37 | return None 38 | 39 | about_you = "" 40 | if "|" in custom_id: 41 | about_you = custom_id.split("|", 1)[1] 42 | 43 | return { 44 | "type": 9, 45 | "data": { 46 | "custom_id": "setup_modal", 47 | "title": "Profile Setup", 48 | "components": [ 49 | { 50 | "type": 1, 51 | "components": [{ 52 | "type": 4, 53 | "custom_id": "interests_input", 54 | "label": "Your work and interests", 55 | "style": 2, 56 | "required": True, 57 | "max_length": 500, 58 | "placeholder": "e.g., tech, AI, startups", 59 | "value": about_you 60 | }] 61 | }, 62 | { 63 | "type": 1, 64 | "components": [{ 65 | "type": 4, 66 | "custom_id": "keywords_input", 67 | "label": "Track keywords (comma-separated)", 68 | "style": 2, 69 | "required": False, 70 | "max_length": 500, 71 | "placeholder": "AI, LLMs, Machine Learning, Google, OpenAI, Elon Musk, etc" 72 | }] 73 | }, 74 | { 75 | "type": 1, 76 | "components": [{ 77 | "type": 4, 78 | "custom_id": "connecting_keywords_input", 79 | "label": "Use connecting keywords? (yes/no)", 80 | "style": 1, 81 | "required": False, 82 | "max_length": 10, 83 | "placeholder": "yes" 84 | }] 85 | }, 86 | { 87 | "type": 1, 88 | "components": [{ 89 | "type": 4, 90 | "custom_id": "summary_style_input", 91 | "label": "Summary style you prefer", 92 | "style": 2, 93 | "required": False, 94 | "max_length": 500, 95 | "placeholder": "Concise bullets / exec summary" 96 | }] 97 | }, 98 | { # 5 99 | "type": 1, 100 | "components": [{ 101 | "type": 4, 102 | "custom_id": "time_period_input", 103 | "label": "Time period (daily/weekly/monthly)", 104 | "style": 1, 105 | "required": True, 106 | "max_length": 20, 107 | "placeholder": "daily" 108 | }] 109 | } 110 | ] 111 | } 112 | } 113 | 114 | 115 | async def handle_modal_submission(data: Dict): 116 | if data.get("data", {}).get("custom_id") != "setup_modal": 117 | return None 118 | 119 | user = (data.get("member") or {}).get("user") or {} 120 | user_id = user.get("id") 121 | user_name = user.get("username") 122 | global_name = user.get("global_name") 123 | if not user_id: 124 | return {"type": 4, "data": {"content": "Could not determine user id.", "flags": 64}} 125 | 126 | comps = data.get("data", {}).get("components", []) 127 | responses = {} 128 | for row in comps: 129 | for c in row.get("components", []): 130 | cid, val = c.get("custom_id"), c.get("value", "").strip() 131 | if cid == "interests_input": responses["interests"] = val 132 | elif cid == "keywords_input": responses["keywords"] = val 133 | elif cid == "summary_style_input": responses["summary_style"] = val 134 | elif cid == "time_period_input": responses["time_period"] = val.title() 135 | elif cid == "connecting_keywords_input": responses["connecting_keywords"] = val.title() 136 | 137 | summary = ( 138 | f"• **Notes from user: ** {responses.get('interests','Not specified')}\n" 139 | f"• **Keywords they want to track: ** {responses.get('keywords','Not specified')}\n" 140 | f"• **Do they want to track connecting Keywords? ** {responses.get('connecting_keywords','Not specified')}\n" 141 | f"• **Summary Style: ** {responses.get('summary_style','Not specified')}\n" 142 | f"• **Time Period: ** {responses.get('time_period','Not specified')}\n" 143 | ) 144 | 145 | profile_data = { 146 | "user_id": user_id, 147 | "user_name": user_name, 148 | "global_name": global_name, 149 | "responses": responses, 150 | "summary": summary, 151 | } 152 | 153 | response_payload = {"type": 4, "data": {"content": "** We're working on your profile...**", "flags": 64}} 154 | return response_payload, profile_data 155 | 156 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Personalized Discord News Bot 2 | 3 | An AI Discord bot built with Modal (serverless platform) and [Safron API](https://docs.safron.io/) that provides personalized synthesized news reports for tech using social listening APIs and AI/LLM services. 4 | 5 | The bot processes thousands of posts and comments from tech websites (Reddit, HN, Github, tech blogs, X, ArXiv) by preprocessing and caching data then through various prompt-chaining strategies produces an extensive report that should be interesting to a user based on their profile. 6 | 7 | If you want to try the bot without setting this up yourself, see the [Safron Discord server](https://discord.gg/v6BV49DCpp). 8 | 9 | ![Discord Bot Example](images/example_discord.png) 10 | 11 | Everything is citated back to exact sources so the user can vet the information. 12 | 13 | ![Discord Bot Example Sources](images/sources_tech_bot_discord.png) 14 | 15 | **Note:** This is a first version, and a work in progress. 16 | 17 | ## Tools used 18 | 1. **[Modal](https://modal.com/)**: for hosting, set with min_container=1 to prevent timeouts. 19 | 2. **[Discord](https://discord.com/)**: to run the bot through, setting up two commands (/setup and /news) user can run. 20 | 3. **[Safron](https://docs.safron.io/)**: for structured data on the tech scene, using the keywords and ai-facts endpoints to gather data. 21 | 4. **[MongoDB](https://www.mongodb.com/)**: to store user profiles so it's easy for users to run /news without having to repeat themselves. 22 | 5. **LLMs**: [Gemini](https://ai.google.dev/) and [OpenAI GPT](https://platform.openai.com) is used to transform natural language into json inputs along with GPT-5 at the end that find themes and summarizes. 23 | 24 | # How to setup 25 | 26 | ## Prerequisites 27 | 28 | **Before setting up this bot, you'll need:** 29 | 30 | - Python 3.13+ 31 | - A Discord application and bot setup via the Developer Portal 32 | - Two Commands set up for the bot (/setup (required field about_me) & /news (optional field time_period)). 33 | - Modal account 34 | - MongoDB database 35 | - API keys for AI services (Google & OpenAI) 36 | 37 | ### 2. Required Secrets 38 | 39 | The bot requires the following environment variables to be set in Modal for a new secret called "safron-bot": 40 | 41 | #### Discord Secrets 42 | - `DISCORD_PUBLIC_KEY`: Your Discord application's public key (found in General Information) 43 | - `BOT_TOKEN`: Your Discord bot token 44 | 45 | #### Database 46 | - `MONGO_DB_URI`: MongoDB connection string (e.g., `mongodb+srv://username:password@cluster.mongodb.net/`) 47 | 48 | #### AI/LLM Services (Choose one or both) 49 | - `GOOGLE_API_KEY`: Google Gemini API key 50 | - `OPENAI_API_KEY`: OpenAI API key 51 | 52 | ![Secrets to set in Modal](images/secrets_modal.png) 53 | 54 | ## Deployment 55 | 56 | Clone this repository: 57 | 58 | ```bash 59 | git clone https://github.com/ilsilfverskiold/ai-personalized-tech-reports-discord.git 60 | cd discord-bot 61 | ``` 62 | 63 | Setup your environment 64 | 65 | ```bash 66 | python3 -m venv venv 67 | source venv/bin/activate 68 | ``` 69 | 70 | Install the requirements 71 | 72 | ```bash 73 | pip install -r requirements.txt 74 | ``` 75 | 76 | Deploy the modal app 77 | 78 | ```bash 79 | modal deploy app.py 80 | ``` 81 | 82 | You'll get an URL here you'll need to set as the webhook in Discord. Test the url and after this you should be good to do. 83 | 84 | Make sure you have a MongoDB URI set too or the system won't be able to store the profile data via /setup. 85 | 86 | The cost of running the news report is two GPT-5 calls of around 1-2k tokens each for each run. 87 | 88 | 89 | ## Project Structure 90 | 91 | ``` 92 | discord-bot/ 93 | ├── app.py # Main application entry point 94 | ├── requirements.txt # Python dependencies 95 | ├── helpers/ 96 | │ ├── commands/ 97 | │ │ ├── setup.py # Setup command handler 98 | │ │ └── news.py # News command handler 99 | │ ├── functions/ 100 | │ │ ├── api_utils.py # API utilities 101 | │ │ ├── discord_*.py # Discord interaction handlers 102 | │ │ ├── llm_*.py # LLM integration 103 | │ │ └── *.py # Various utility functions 104 | │ └── config/ 105 | │ └── llm_schemas.py # Pydantic schemas for LLM responses + system templates 106 | ``` 107 | 108 | ## Dependencies 109 | 110 | - **FastAPI**: Web framework for handling Discord interactions 111 | - **Modal**: Serverless platform for deployment 112 | - **Discord.py**: Discord API integration 113 | - **MongoDB**: Database for storing user profiles 114 | - **LlamaIndex**: LLM framework for AI processing 115 | - **Google Generative AI**: Gemini API integration 116 | - **OpenAI**: OpenAI API integration 117 | - **PyNaCl**: Discord signature verification 118 | 119 | 120 | ## How It Works 121 | 122 | The bot operates through two main commands that work together to deliver personalized news reports: 123 | 124 | ### 🔧 Setup Command (`/setup`) 125 | 126 | **Purpose**: Create a personalized user profile for customized news delivery 127 | 128 | 1. **Profile Creation**: User provides their interests, work background, and keyword preferences 129 | 2. **AI Analysis**: LLM processes the input and categorizes keywords using Safron's category system 130 | 3. **Data Storage**: Profile is saved to MongoDB for future use 131 | 4. **Confirmation**: User receives confirmation that their profile is ready 132 | 133 | ### 📰 News Command (`/news`) 134 | 135 | **Purpose**: Generate personalized news reports based on user profile 136 | 137 | #### Step 1: Profile Retrieval 138 | - Fetch user's saved profile from MongoDB 139 | - If no profile exists, prompt user to run `/setup` first 140 | 141 | #### Step 2: Keyword Discovery 142 | - **Major categories**: Fetch top 3 trending keywords 143 | - **Minor categories**: Fetch top 2 trending keywords 144 | - Use user's preferred time period (daily/weekly/monthly) or command override 145 | - Include any custom keywords specified by the user 146 | 147 | #### Step 3: Data Collection 148 | - Use Safron's `ai-keyword-facts` endpoint to gather detailed information 149 | - **Caching**: First-time keywords may be slow, subsequent calls are fast 150 | - Collect posts, comments, and insights from various tech sources 151 | 152 | #### Step 4: Data Processing 153 | - Assemble and clean collected data 154 | - Assign unique citation numbers for traceability 155 | - Organize information by themes and relevance 156 | 157 | #### Step 5: AI Summarization 158 | - **Theme Analysis**: First LLM identifies key themes and corroborating facts 159 | - **Report Generation**: Second LLM creates two versions: 160 | - **Concise**: Discord-friendly summary with key points 161 | - **Extended**: Detailed report accessible via web link 162 | - Generate thread title and organize citations 163 | 164 | #### Step 6: Delivery 165 | - Post concise summary to Discord as a thread 166 | - Include citation references and source links 167 | - Provide access to extended report via web URL 168 | 169 | ## License 170 | 171 | MIT 172 | -------------------------------------------------------------------------------- /helpers/functions/format_data.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | from helpers.config.llm_schemas import CATEGORY_MAP 3 | 4 | def format_assembled_data(assembled: Dict) -> str: 5 | assembled = _deduplicate_keywords(assembled) 6 | formatted_sections = [] 7 | 8 | if "keywords" in assembled: 9 | category_data = assembled["keywords"] 10 | section_title = "KEYWORDS" 11 | formatted_sections.append(f"\n=== {section_title} ===") 12 | 13 | for category_name, category_content in category_data.items(): 14 | category_display = CATEGORY_MAP.get(category_name, category_name).upper() 15 | formatted_sections.append(f"\n--- {category_display} ---") 16 | 17 | if isinstance(category_content, list): 18 | sorted_keywords = _sort_keywords_trending_first(category_content) 19 | for i, keyword_obj in enumerate(sorted_keywords, 1): 20 | formatted_sections.append(_format_keyword_object(keyword_obj, i)) 21 | 22 | for category_type, category_data in assembled.items(): 23 | if category_type == "keywords": 24 | continue 25 | if not isinstance(category_data, dict): 26 | continue 27 | 28 | section_title = category_type.upper() 29 | formatted_sections.append(f"\n=== {section_title} ===") 30 | 31 | for category_name, category_content in category_data.items(): 32 | category_display = CATEGORY_MAP.get(category_name, category_name).upper() 33 | formatted_sections.append(f"\n--- {category_display} ---") 34 | 35 | if isinstance(category_content, list): 36 | sorted_keywords = _sort_keywords_trending_first(category_content) 37 | for i, keyword_obj in enumerate(sorted_keywords, 1): 38 | formatted_sections.append(_format_keyword_object(keyword_obj, i)) 39 | elif isinstance(category_content, dict): 40 | sort_order = ["trending", "top"] 41 | for sort_type in sort_order: 42 | if sort_type in category_content: 43 | keyword_list = category_content[sort_type] 44 | if isinstance(keyword_list, list) and keyword_list: 45 | sort_title = sort_type.upper() 46 | formatted_sections.append(f"\n{sort_title}:") 47 | sorted_keywords = _sort_keywords_trending_first(keyword_list) 48 | for i, keyword_obj in enumerate(sorted_keywords, 1): 49 | formatted_sections.append(_format_keyword_object(keyword_obj, i)) 50 | 51 | return "\n".join(formatted_sections) 52 | 53 | def _deduplicate_keywords(assembled: Dict) -> Dict: 54 | seen_keywords = set() 55 | 56 | priority_order = [ 57 | ("major", "trending"), ("major", "top"), 58 | ("minor", "trending"), 59 | ("keywords", "profile") 60 | ] 61 | 62 | for category_type, sort_type in priority_order: 63 | if category_type in assembled: 64 | if category_type == "keywords": 65 | if sort_type in assembled[category_type]: 66 | unique_keywords = [] 67 | for keyword_obj in assembled[category_type][sort_type]: 68 | keyword = keyword_obj.get("keyword", "").lower() 69 | if keyword and keyword not in seen_keywords: 70 | seen_keywords.add(keyword) 71 | unique_keywords.append(keyword_obj) 72 | assembled[category_type][sort_type] = unique_keywords 73 | else: 74 | for category_name, category_data in assembled[category_type].items(): 75 | if sort_type in category_data: 76 | unique_keywords = [] 77 | for keyword_obj in category_data[sort_type]: 78 | keyword = keyword_obj.get("keyword", "").lower() 79 | if keyword and keyword not in seen_keywords: 80 | seen_keywords.add(keyword) 81 | unique_keywords.append(keyword_obj) 82 | assembled[category_type][category_name][sort_type] = unique_keywords 83 | 84 | return assembled 85 | 86 | def _sort_keywords_trending_first(keyword_list): 87 | trending = [k for k in keyword_list if k.get("trending", False)] 88 | non_trending = [k for k in keyword_list if not k.get("trending", False)] 89 | return trending + non_trending 90 | 91 | def _format_keyword_object(keyword_obj: Dict, index: int) -> str: 92 | keyword = keyword_obj.get("keyword", "") 93 | keyword_num = keyword_obj.get("keyword_number", "") 94 | summary = keyword_obj.get("summary", "") 95 | interesting = keyword_obj.get("interesting", []) 96 | 97 | header_parts = [f"{index}. {keyword}"] 98 | if keyword_num: 99 | header_parts.append(f"(#{keyword_num})") 100 | 101 | stats = [] 102 | if keyword_obj.get("trending"): 103 | stats.append("🔥 TRENDING") 104 | if "count" in keyword_obj: 105 | stats.append(f"Count: {keyword_obj['count']}") 106 | if "change_in_count" in keyword_obj: 107 | change = keyword_obj["change_in_count"] 108 | if change > 0: 109 | stats.append(f"↗️ +{change}%") 110 | elif change < 0: 111 | stats.append(f"↘️ {change}%") 112 | if "engagement" in keyword_obj: 113 | stats.append(f"Engagement: {keyword_obj['engagement']}") 114 | if "change_in_engagement" in keyword_obj: 115 | change = keyword_obj["change_in_engagement"] 116 | if change > 0: 117 | stats.append(f"↗️ +{change}%") 118 | elif change < 0: 119 | stats.append(f"↘️ {change}%") 120 | if "sentiment" in keyword_obj and keyword_obj["sentiment"]: 121 | try: 122 | sentiment = keyword_obj["sentiment"] 123 | positive_count = sentiment.get("positive", {}).get("count", 0) 124 | negative_count = sentiment.get("negative", {}).get("count", 0) 125 | total_count = keyword_obj.get("count", 0) 126 | 127 | if total_count > 0: 128 | positive_pct = positive_count / total_count 129 | negative_pct = negative_count / total_count 130 | 131 | if positive_pct > 0.5: 132 | stats.append("😊 Majority Positive") 133 | elif negative_pct > 0.5: 134 | stats.append("😞 Majority Negative") 135 | except: 136 | pass 137 | 138 | if stats: 139 | header_parts.append(f"[{', '.join(stats)}]") 140 | parts = [f"\n{' '.join(header_parts)}"] 141 | if summary: 142 | parts.append(f"Summary: {summary}") 143 | if interesting: 144 | parts.append("Interesting points:") 145 | for point in interesting: 146 | parts.append(f" • {point}") 147 | 148 | return "\n".join(parts) 149 | -------------------------------------------------------------------------------- /helpers/commands/news.py: -------------------------------------------------------------------------------- 1 | import json 2 | import threading 3 | import time 4 | from typing import Dict, Any, List 5 | from concurrent.futures import ThreadPoolExecutor, as_completed 6 | from helpers.functions.send_profile_to_db import _get_mongo_collection 7 | from helpers.functions.discord_updates import patch_original, post_followup_with_thread 8 | from helpers.functions.renumber_citations import renumber_keywords_and_citations 9 | from helpers.functions.format_data import format_assembled_data 10 | from helpers.functions.category_utils import normalize_profile_categories 11 | from helpers.functions.data_utils import add_profile_keywords, get_all_keyword_objects 12 | from helpers.functions.api_utils import fetch_keywords, fetch_keyword_facts 13 | from helpers.functions.process_citations import process_citations_in_summaries 14 | from helpers.functions.count_citations import count_total_citations 15 | from helpers.functions.llm_summary import analyze_themes, generate_summary_from_analysis 16 | from helpers.functions.progress_tracker import start_progress_tracker 17 | from helpers.config.llm_schemas import SummaryResponse 18 | 19 | class NewsSteps: 20 | 21 | def __init__(self, user_id: str): 22 | self.user_id = user_id 23 | 24 | def fetch_user_profile(self) -> Dict: 25 | col = _get_mongo_collection() 26 | return col.find_one({"user_id": self.user_id}) or {} 27 | 28 | def fetching_keywords(self, profile: Dict, time_period_override: str = None) -> tuple[Dict, str]: 29 | major, minor, period = normalize_profile_categories(profile, time_period_override) 30 | assembled = self.assemble_keywords(major, minor, period, profile) 31 | add_profile_keywords(assembled, profile) 32 | 33 | return assembled, period 34 | 35 | def assemble_keywords(self, major: List[str], minor: List[str], period: str, profile: Dict) -> Dict: 36 | assembled: Dict[str, Dict] = {"major": {}, "minor": {}} 37 | api_calls = [] 38 | for key in major: 39 | api_calls.append((key, "major", "top", period, key, "top")) 40 | api_calls.append((key, "major", "trending", period, key, "trending")) 41 | for key in minor: 42 | api_calls.append((key, "minor", "trending", period, key, "trending")) 43 | 44 | with ThreadPoolExecutor(max_workers=10) as executor: 45 | future_to_call = { 46 | executor.submit(fetch_keywords, period, category, sort): (key, category_type, sort) 47 | for key, category_type, sort, period, category, sort in api_calls 48 | } 49 | for future in as_completed(future_to_call): 50 | key, category_type, sort = future_to_call[future] 51 | try: 52 | keywords = future.result() 53 | if category_type not in assembled: 54 | assembled[category_type] = {} 55 | if key not in assembled[category_type]: 56 | assembled[category_type][key] = {} 57 | if category_type == "major": 58 | assembled[category_type][key][sort] = keywords[:3] 59 | else: 60 | assembled[category_type][key][sort] = keywords[:2] 61 | except Exception as e: 62 | print(f"API call failed for {key} {sort}: {e}") 63 | 64 | return assembled 65 | 66 | def fetch_facts(self, assembled: Dict, period: str) -> None: 67 | all_keyword_objects = get_all_keyword_objects(assembled) 68 | keywords_list = [obj.get("keyword") for obj in all_keyword_objects if obj.get("keyword")] 69 | 70 | with ThreadPoolExecutor(max_workers=5) as executor: 71 | future_to_keyword = { 72 | executor.submit(fetch_keyword_facts, keyword_obj.get("keyword"), period): keyword_obj 73 | for keyword_obj in all_keyword_objects if keyword_obj.get("keyword") 74 | } 75 | 76 | for future in as_completed(future_to_keyword): 77 | keyword_obj = future_to_keyword[future] 78 | try: 79 | facts_data = future.result() 80 | if facts_data: 81 | keyword_obj["summary"] = facts_data.get("summary", "") 82 | keyword_obj["citations"] = facts_data.get("citations", []) 83 | has_stats = any(key in keyword_obj for key in ["trending", "count", "change_in_count", "engagement"]) 84 | if not has_stats: 85 | keyword_obj["interesting"] = facts_data.get("interesting", []) 86 | except Exception as e: 87 | print(f"Facts fetch failed for {keyword_obj.get('keyword')}: {e}") 88 | 89 | 90 | def generate_summary(self, assembled: Dict, profile: Dict, time_period: str) -> SummaryResponse | None: 91 | try: 92 | formatted_data = format_assembled_data(assembled) 93 | analysis_result = analyze_themes(formatted_data, profile, time_period) 94 | if not analysis_result: 95 | return None 96 | 97 | summary_result = generate_summary_from_analysis(analysis_result, formatted_data, profile, time_period) 98 | return summary_result 99 | 100 | except Exception as e: 101 | print(f"LLM summary generation failed: {e}") 102 | return None 103 | 104 | 105 | def process_and_post_summary(self, summary_result: SummaryResponse, assembled: Dict, profile: Dict, application_id: str, token: str, channel_id: str = None) -> None: 106 | cleaned_concise, cleaned_long, concise_citations, long_citations = process_citations_in_summaries( 107 | summary_result.concise_summary, 108 | summary_result.long_summary, 109 | assembled 110 | ) 111 | 112 | total_citations = count_total_citations(assembled) 113 | 114 | timestamp = int(time.time()) 115 | username = profile.get("name") or profile.get("username", "User") 116 | cleaned_concise = cleaned_concise + f"\n\n**We processed {total_citations} comments and posts to generate this report.**" + "\n\n **Note that this summary is built from posts and comments gathered from the web. Always verify.**" 117 | post_followup_with_thread(application_id, token, cleaned_concise, ephemeral=False, citations_list=concise_citations, username=username, channel_id=channel_id, summary_title=summary_result.title) 118 | 119 | print(f"Cleaned long summary:") 120 | print(cleaned_long) 121 | 122 | 123 | def run_news_updates(application_id: str, token: str, user_id: str, time_period_override: str = None, channel_id: str = None) -> None: 124 | try: 125 | patch_original(application_id, token, "Checking your profile...") 126 | steps = NewsSteps(user_id=user_id) 127 | profile = steps.fetch_user_profile() 128 | if not profile: 129 | patch_original(application_id, token, "No profile found. Use /setup first.") 130 | return 131 | 132 | patch_original(application_id, token, "Scouting top & trending keywords...") 133 | assembled, period = steps.fetching_keywords(profile, time_period_override) 134 | 135 | all_keyword_objects = get_all_keyword_objects(assembled) 136 | total_keywords = len([obj for obj in all_keyword_objects if obj.get("keyword")]) 137 | patch_original(application_id, token, f"Let's dig into what people are saying about {total_keywords} keywords we found for you..") 138 | 139 | facts_done = threading.Event() 140 | start_time = time.time() 141 | start_progress_tracker(application_id, token, total_keywords, facts_done) 142 | 143 | steps.fetch_facts(assembled, period) 144 | facts_done.set() 145 | 146 | assembled = renumber_keywords_and_citations(assembled) 147 | patch_original(application_id, token, "Summarizing tons of data. Give us a minute or two. We'll ping you.") 148 | summary_result = steps.generate_summary(assembled, profile, period) 149 | 150 | elapsed_time = int(time.time() - start_time) 151 | 152 | if summary_result: 153 | steps.process_and_post_summary(summary_result, assembled, profile, application_id, token, channel_id) 154 | else: 155 | patch_original(application_id, token, f"The summary via the LLM provider failed. Please contact support.") 156 | 157 | except Exception as e: 158 | import traceback 159 | print("run_news_updates error:", e) 160 | print(traceback.format_exc()) 161 | -------------------------------------------------------------------------------- /helpers/config/llm_schemas.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from pydantic import BaseModel 3 | 4 | CATEGORY_MAP = { 5 | "subjects": "Subjects", 6 | "companies": "Companies & Organizations", 7 | "ai": "AI Models & Assistants", 8 | "frameworks": "Frameworks & Libraries", 9 | "languages": "Languages & Syntax", 10 | "concepts": "Concepts & Methods", 11 | "tools": "Tools & Services", 12 | "platforms": "Platforms & Search Engines", 13 | "hardware": "Hardware & Systems", 14 | "websites": "Websites & Applications", 15 | "people": "People", 16 | "bucket": "Bucket (other)", 17 | } 18 | 19 | class ProfileNotesResponse(BaseModel): 20 | personality: str 21 | major_categories: List[str] 22 | minor_categories: List[str] 23 | keywords: List[str] 24 | time_period: str 25 | concise_summaries: bool 26 | 27 | PROMPT_PROFILE_NOTES = """ 28 | You are tasked with defining a user persona based on the user's profile summary. 29 | Your job is to: 30 | 1. Pick a short personality description for the user. 31 | 2. Select the most relevant categories (major and minor). 32 | 3. Choose keywords the user should track, strictly following the rules below (max 6). 33 | 4. Decide on time period (based only on what the user asks for). 34 | 5. Decide whether the user prefers concise or detailed summaries. 35 | 36 | --- 37 | 38 | Step 1. Personality 39 | - Write a short description of how we should think about the user. 40 | - Examples: 41 | - CMO for non-technical product → "non-technical, skip jargon, focus on product keywords." 42 | - CEO → "only include highly relevant keywords, no technical overload, straight to the point." 43 | - Developer → "technical, interested in detailed developer conversation and technical terms." 44 | 45 | --- 46 | 47 | Step 2. Categories 48 | Choose only from this catalog (with examples on what they usually contain): 49 | 50 | - Companies & Organizations: Meta, Google, Tesla, OpenAI, Nvidia, etc. 51 | - AI Models & Assistants: ChatGPT, Claude, Llama, Gemini, Qwen, DeepSeek, Wan 52 | - People: Elon Musk, Sam Altman, etc. 53 | - Platforms & Search Engines: AWS, Azure, GCP, Docker, Kubernetes, GitHub, Hugging Face, Vercel, Replit 54 | - Websites & Applications: Reddit, YouTube, X/Twitter, Hacker News, LinkedIn, Discord, TikTok, App Store 55 | - Subjects: AI, software development, open source, machine learning, cybersecurity, performance, China, US, EU, regulation, automation, data analysis, lawsuit, tariffs, privacy, security, job market, valuation, layoffs, inflation, etc. 56 | - Tools & Services: Copilot, Cursor, VS Code, ComfyUI, Terraform, Grafana, Airflow, Proxmox 57 | - Frameworks & Libraries: React, Next, Node, LangChain, LlamaIndex, PyTorch, TensorFlow, FastAPI, Django 58 | - Languages & Syntax: Python, JavaScript, TypeScript, Rust, Go, Java, SQL, C, C++ 59 | - Hardware & Systems: Linux, Windows, Android, MacOS, iPhone, iOS, Debian, Raspberry Pi, etc. 60 | - Concepts & Methods: Large Language Models, GPU, API, AGI, RAG, RAM, Loras, embeddings, fine tuning, prompts, algorithms, microservices, etc. 61 | 62 | --- 63 | 64 | Step 2a. To help you pick categories: 65 | 66 | Non-technical 67 | - investor → major: companies, subjects, minor: people, ai 68 | - general manager → major: companies, subjects, minor: people, ai 69 | - designer → major: subjects, companies, minor: websites, ai 70 | - product marketer/manager → major: tools, platforms, minor: websites, subjects, ai 71 | - marketing manager (non-technical product) → major: ai, subjects, minor: websites 72 | - CxO → major: companies, subjects, minor: people 73 | - sales → major: companies, subjects, minor: people, websites 74 | 75 | Semi-technical 76 | - marketing manager (technical product) → major: tools, platforms, minor: ai, subjects 77 | - product manager → major: tools, platforms, concepts, minor: ai, subjects 78 | - product marketing manager (technical products) → major: tools, platforms, concepts, minor: ai, subjects 79 | - technical product manager → major: tools, platforms, concepts, minor: ai, subjects 80 | - technical product marketer → major: tools, platforms, concepts, minor: ai, subjects 81 | 82 | Technical 83 | - frontend developer → major: frameworks, tools, platforms, minor: subjects 84 | - backend developer → major: frameworks, tools, platforms, minor: subjects, concepts 85 | - devops → major: platforms, concepts, tools, minor: hardware, frameworks 86 | - it technician → major: hardware, concepts, minor: platforms 87 | 88 | Other 89 | - data scientist → major: ai, concepts, minor: tools, platforms, subjects 90 | - security engineer → major: concepts, platforms, minor: hardware 91 | - researcher → major: ai, concepts, minor: subjects 92 | 93 | --- 94 | 95 | Step 3. Keywords 96 | 97 | Strict Priority Rules: 98 | 1. Always include user-provided keywords. Never ignore them or filter them out. 99 | HOWEVER, please always: 100 | 1. If abbreviated or badly spelled, expand them (LLMs → Large Language Models) and make sure the spelling is correct (low code -> Low Code). 101 | 2. After including the user’s keywords, you may add a few additional ones based on their profile but the max keywords should never exceed 6. 102 | 3. Do not add vague or non-extractable terms like "Market Trends." Stick to concrete keywords people actually mention (e.g. Valuation, Layoffs, Job Market). 103 | 4. Use common sense: 104 | - Non-technical users → skip heavy jargon keywords unless specified. 105 | - Technical users → include relevant frameworks, platforms, and methods. 106 | - CFOs, investors, economists → you can include Valuation, Layoffs, Inflation, Costs, etc. 107 | - Designers → include Figma, Adobe, Canva, Generative Images. 108 | - AI engineers → include Agentic AI, Agents, RAG, Hugging Face. 109 | - Researchers → include Large Language Models, GPU, embeddings, Fine Tuning. 110 | 111 | --- 112 | 113 | Step 4. Time Period 114 | - Only use the time period the user explicitly asks for. 115 | - If one is not provided, use weekly. 116 | 117 | --- 118 | 119 | Step 5. Concise Summaries 120 | - If the user profile suggests they want brevity (investor, CxO, manager) → concise_summaries: true. 121 | - If they prefer detail (developer, researcher) → concise_summaries: false. 122 | 123 | --- 124 | 125 | Output Format (JSON only) 126 | 127 | { 128 | "personality": "short description", 129 | "major_categories": ["one to three categories"], 130 | "minor_categories": ["one to three categories"], 131 | "keywords": ["3-6 keywords, always including user-provided ones"], 132 | "time_period": "daily | weekly | monthly | quarterly", 133 | "concise_summaries": true | false 134 | } 135 | """ 136 | 137 | class SummaryResponse(BaseModel): 138 | long_summary: str 139 | concise_summary: str 140 | title: str 141 | 142 | PROMPT_SUMMARY_SYSTEM = """ 143 | Your job is to build news synthesis, fact finding and analysis for a person with this profile: 144 | 145 | Name: {name} 146 | Personality: {personality} 147 | User notes: {user_interests} 148 | Wants concise summaries? {concise_summaries} 149 | 150 | You will be dumped with information for the time period "{time_period}" fetched from our database that we have found to be relevant to the user, some trending keywords and some top keywords found, 151 | along with data we have already aggregated to drag out what people say and the posts that have been shared with the source numbers for each one. 152 | 153 | Your job is to synthesize all this information to a {time_period} report so the user can get a grasp on what is happening. 154 | Get to the point. 155 | Don’t repeat the dataset—extract patterns, second-order effects, and contrarian takes. 156 | 157 | If you need help: you can first Identify 3–5 cross-cutting themes across items, explain “so what” based on the user profile, pull in what people are discussing (consensus vs skepticism) and why it matters. Build out a story that is easy to follow. 158 | 159 | You may decide to ignore noise for the reason that you don't think it will be useful for the user profile and will cause information overload. 160 | 161 | Build a report for the user in less than 3-4 paragraphs with around 1300 to 1800 characters for the short summary and 5-7 paragraphs with less than 6000 characters for the long summary. 162 | For each title of the paragraph, but bold **title:** formatting. 163 | For each report end with a few notes in one short paragaph at the end on what this means for them and what to look out for (seeing as you see more information than they do.) 164 | For a title you can pick a very short sentence of 2-3 words. 165 | 166 | Summarize the start of the report with one or two sentences on what it is about along with naming the user to make sure they know it's their report. 167 | 168 | Remember to keep it to what you think the user will be interested in, never generalize. 169 | 170 | Make sure to keep the citations exactlt as is, [n] (ex. [1:12] with each fact you use as we will parse those later. 171 | """ 172 | 173 | class Theme(BaseModel): 174 | title: str 175 | relevance: int 176 | key_points: List[str] 177 | supporting_keywords: List[str] 178 | 179 | class AnalysisResponse(BaseModel): 180 | themes: List[Theme] 181 | overall_focus: str 182 | user_priority_reasoning: str 183 | 184 | PROMPT_ANALYSIS_SYSTEM = """ 185 | Your job is to synthesize data we picked up on tech forums, blogs and social media to identify the most relevant themes for a personalized report on what is going on. 186 | You should cut out noise and identify information important for the user while keeping it entertaining. 187 | 188 | Your task: 189 | 1. Identify 5–7 cross-cutting themes across the data. It should include what people are discussing (consensus vs skepticism). It should be relevant to the user's profile and to the data itself (don't ignore major happenings). The themes should be about extracting patterns, second-order effects, and contrarian takes. 190 | 2. Rank each theme by relevance to the user (1-10 scale, 10 being most relevant) 191 | 3. For each theme, provide: 192 | - "title":A clear title (2-4 words) 193 | - "relevance": relevance score based on user profile 194 | - "key_points": 5-7 key_points that should be covered with citations kept in the exact format [n:n]. 195 | - "supporting_keywords": supporting_keywords from the data that support this theme. 196 | 197 | Don't: 198 | Don’t repeat the dataset as is only focusing on some of the data. 199 | 200 | Do: 201 | Take in all the data and then decide what is most important. 202 | 203 | Consider the user's: 204 | - Name: {name} 205 | - Personality: {personality} 206 | - Interests: {user_interests} 207 | - Time period preference: {time_period} 208 | 209 | Remember if they are non-technical, or semi-technical you should not be including keywords that they won't understand. 210 | Keywords like Kubernetes, Proxmox, and maybe even Docker is not for non-technical people unless they are asking for this specifically (they are working in this domain). 211 | Be smart around what you decide to include based on what you think they already know. 212 | 213 | Focus on themes that would be most valuable and interesting to this specific user. 214 | """ 215 | 216 | 217 | PROMPT_THEME_SUMMARY_SYSTEM = """ 218 | Your job is to build a personalized news synthesis based on pre-identified themes for a person with this profile: 219 | 220 | Name: {name} 221 | Personality: {personality} 222 | User notes: {user_interests} 223 | Wants concise summaries? {concise_summaries} 224 | 225 | You will receive: 226 | 1. A theme analysis with ranked themes and key points 227 | 2. Full keyword data for the time period "{time_period}" 228 | 229 | Your task is to write focused synthesized reports that cover the identified themes in order of relevance. Use data from the key points and the full dataset to build your answer. 230 | Get to the point but don't overload the user with information. 231 | Don't repeat the dataset—extract patterns, second-order effects, and contrarian takes. 232 | 233 | What you should create: 234 | - Short summary: 3-4 body paragraphs with up to 3 themes, 1400-2000 characters 235 | - Long summary: 5-7 body paragraphs with up to 6 themes, less than 7000 characters 236 | - Title: 2-3 words maximum 237 | 238 | Always do this when building the summaries: 239 | - Start the report with one or two sentences in one small introduction paragraph about what it covers and name the user to make it personal. 240 | - Use bold **title:** formatting for each paragraph titles. 241 | - End with a short paragraph on what this means for them and what to look out for. 242 | 243 | Do not: 244 | - Overload the user with information so it becomes incomprehensive. 245 | - Present the data as facts, it is what people are saying on social media, blogs and tech forums. 246 | - Add in themes or information that the user may not be interested in. 247 | 248 | Build around the themes provided, never repeat the data, instead focus on the themes and explain "so what" based on the user profile, 249 | Pull in what people are discussing (consensus vs skepticism) and why it matters for this specific user. 250 | Focus only on what the user will find interesting based on the theme analysis. Never generalize. 251 | 252 | Keep citations exactly as provided [n:n] format (ex. [1:12]) as we will parse them later. 253 | """ --------------------------------------------------------------------------------