├── db_handler
    ├── sample_vault
    │   ├── recipients.csv
    │   ├── secrets.ini
    │   └── links.py
    ├── __init__.py
    ├── models.py
    ├── db.py
    └── dynamo.py
├── .gitignore
├── services
    ├── crawler
    │   ├── blog_crawler.py
    │   ├── social_media_crawler.py
    │   ├── __init__.py
    │   └── rss_crawler.py
    ├── apps
    │   ├── or_service.py
    │   ├── __init__.py
    │   ├── kg_service.py
    │   ├── hf_service.py
    │   ├── gh_service.py
    │   ├── ph_service.py
    │   └── arx_service.py
    ├── __init__.py
    ├── competition_service.py
    ├── product_service.py
    ├── research_service.py
    ├── email_service.py
    ├── news_service.py
    └── event_service.py
├── .dockerignore
├── launch.py
├── requirements.txt
├── static
    ├── logo.svg
    ├── favicon.svg
    ├── newsletter.html
    └── style.css
├── LICENSE
├── Dockerfile
├── utils
    ├── auth_utility.py
    └── utility.py
├── .github
    └── workflows
    │   └── deploy.yml
├── app
    └── main.py
├── README.md
├── CODE_OF_CONDUCT.md
└── router
    └── routes.py


/db_handler/sample_vault/recipients.csv:
--------------------------------------------------------------------------------
1 | email
2 | add-your-email@test.com


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /test.py
2 | /static/generated_newsletter.html
3 | /db_handler/vault/
4 | /sync-vault*.sh
5 | 


--------------------------------------------------------------------------------
/services/crawler/blog_crawler.py:
--------------------------------------------------------------------------------
1 | class SubstackCrawler:
2 |     pass
3 | 
4 | class MediumCrawler:
5 |     pass


--------------------------------------------------------------------------------
/services/crawler/social_media_crawler.py:
--------------------------------------------------------------------------------
1 | class TwitterCrawler:
2 |     pass
3 | 
4 | class LinkedinCrawler:
5 |     pass


--------------------------------------------------------------------------------
/db_handler/__init__.py:
--------------------------------------------------------------------------------
1 | from db_handler.db import *
2 | from db_handler.models import *
3 | from db_handler.dynamo import Dynamo
4 | from db_handler.vault.links import rss_feed, sites


--------------------------------------------------------------------------------
/services/apps/or_service.py:
--------------------------------------------------------------------------------
1 | class OpenReviewScanner:
2 |     def __init__(self, top_n: int = 5):
3 |         self.top_n = top_n
4 | 
5 |     def get_top_n_papers(self):
6 |         pass


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | __pycache__
 2 | *.pyc
 3 | *.pyo
 4 | *.pyd
 5 | .Python
 6 | env/
 7 | venv/
 8 | .env
 9 | *.log
10 | .git
11 | .gitignore
12 | .pytest_cache/
13 | .coverage
14 | htmlcov/
15 | .DS_Store
16 | test.py
17 | *.sqlite


--------------------------------------------------------------------------------
/launch.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask
 2 | from router.routes import bp, limiter
 3 | import os
 4 | 
 5 | app = Flask(__name__)
 6 | 
 7 | limiter.init_app(app)
 8 | app.register_blueprint(bp)
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     port = int(os.environ.get("PORT", 5000))
13 |     app.run(host="0.0.0.0", port=port, debug=True)
14 | 


--------------------------------------------------------------------------------
/services/crawler/__init__.py:
--------------------------------------------------------------------------------
 1 | from services.crawler.rss_crawler import *
 2 | from services.crawler.blog_crawler import SubstackCrawler, MediumCrawler
 3 | from services.crawler.social_media_crawler import LinkedinCrawler, TwitterCrawler
 4 | 
 5 | __all__ = [
 6 |     "SubstackCrawler",
 7 |     "MediumCrawler",
 8 |     "LinkedinCrawler",
 9 |     "TwitterCrawler"
10 | ]


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | substack-api==1.1.1
 2 | kaggle==1.7.4.5
 3 | pydantic==2.11.9
 4 | pandas
 5 | requests==2.32.5
 6 | simplejson==3.20.2
 7 | botocore==1.40.44
 8 | schedule==1.2.2
 9 | PyJWT
10 | cryptography==46.0.2
11 | beautifulsoup4==4.14.2
12 | numpy
13 | scikit-learn
14 | feedparser==6.0.12
15 | pytz==2025.2
16 | Flask
17 | Flask-Cors==6.0.1
18 | Flask-Limiter
19 | uvicorn
20 | sqlitedict==2.1.0
21 | sendgrid==7.0.0rc2
22 | boto3==1.40.44
23 | 


--------------------------------------------------------------------------------
/services/apps/__init__.py:
--------------------------------------------------------------------------------
 1 | from services.apps.arx_service import ArxivScanner
 2 | from services.apps.gh_service import GitHubScanner
 3 | from services.apps.hf_service import HuggingFaceScanner
 4 | from services.apps.kg_service import KaggleScanner
 5 | from services.apps.or_service import OpenReviewScanner
 6 | from services.apps.ph_service import ProductHuntScanner
 7 | 
 8 | __all__ = [
 9 |     "ArxivScanner",
10 |     "GitHubScanner",
11 |     "HuggingFaceScanner",
12 |     "KaggleScanner",
13 |     "OpenReviewScanner",
14 |     "ProductHuntScanner"
15 | ]


--------------------------------------------------------------------------------
/db_handler/sample_vault/secrets.ini:
--------------------------------------------------------------------------------
 1 | [default]
 2 | brand_name = "AiLert"
 3 | 
 4 | [HuggingFace]
 5 | # token = add github token and uncomment this line
 6 | 
 7 | [Kaggle]
 8 | # path = add kaggle credential file path here and uncomment
 9 | 
10 | 
11 | [Dynamo]
12 | # region = us-east-1
13 | 
14 | [Arxiv]
15 | # q = cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.AI+OR+cat:cs.NE+OR+cat:cs.RO
16 | 
17 | [Sendgrid]
18 | # api_key = add sendgrid api key and uncomment
19 | 
20 | [JWT]
21 | # user_id = test
22 | # token = generate a random token that your apis will accept


--------------------------------------------------------------------------------
/static/logo.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 200 100">
 2 |   <!-- Background circle -->
 3 |   <circle cx="100" cy="50" r="40" fill="#6366f1" opacity="0.1"/>
 4 | 
 5 |   <!-- Stylized 'A' -->
 6 |   <path d="M70 80 L100 20 L130 80 Z" fill="none" stroke="#6366f1" stroke-width="8"/>
 7 | 
 8 |   <!-- Dot for 'i' -->
 9 |   <circle cx="140" cy="30" r="6" fill="#8b5cf6"/>
10 | 
11 |   <!-- Lightning bolt -->
12 |   <path d="M140 45 L130 60 L140 60 L130 75" fill="none" stroke="#8b5cf6" stroke-width="8" stroke-linecap="round" stroke-linejoin="round"/>
13 | </svg>


--------------------------------------------------------------------------------
/services/__init__.py:
--------------------------------------------------------------------------------
 1 | from services.news_service import NewsService
 2 | from services.event_service import EventsService
 3 | from services.research_service import ResearchService
 4 | from services.apps.gh_service import GitHubScanner
 5 | from services.competition_service import CompetitionService
 6 | from services.product_service import ProductService
 7 | from services.email_service import EmailService
 8 | 
 9 | 
10 | __all__ = [
11 |     "NewsService",
12 |     "GitHubScanner",
13 |     "CompetitionService",
14 |     "EventsService",
15 |     "ResearchService",
16 |     "ProductService",
17 |     "EmailService"
18 | ]


--------------------------------------------------------------------------------
/services/competition_service.py:
--------------------------------------------------------------------------------
 1 | from db_handler import Competitions
 2 | from services.apps import KaggleScanner
 3 | 
 4 | class CompetitionService:
 5 |     def __init__(self):
 6 |         self.kaggle = KaggleScanner()
 7 |         self.competitions = []
 8 | 
 9 |     async def get_latest_competitions(self):
10 |         kaggle = self.kaggle.get_new_competitions_launch()
11 |         self.competitions.extend([Competitions(
12 |             name = comp["name"],
13 |             link = comp["link"],
14 |             deadline = comp["deadline"],
15 |             reward = comp["reward"]
16 |         ) for comp in kaggle])
17 | 
18 |         return self.competitions


--------------------------------------------------------------------------------
/services/product_service.py:
--------------------------------------------------------------------------------
 1 | from db_handler import Products, sites
 2 | from services.apps import HuggingFaceScanner, ProductHuntScanner
 3 | 
 4 | class ProductService:
 5 |     def __init__(self):
 6 |         self.hf_scanner = HuggingFaceScanner(sites["hf_base_url"],1)
 7 |         self.ph_scanner = ProductHuntScanner(sites["ph_site_url"], sites["ph_url"],1)
 8 |         self.products = []
 9 | 
10 |     async def get_latest_products(self):
11 |         hf_products = self.hf_scanner.weekly_scanner()
12 |         ph_products = None #self.ph_scanner.get_last_week_top_products()
13 |         final_dict = hf_products #+ ph_products
14 |         for key, items in final_dict.items():
15 |             for item in items:
16 |                 self.products.append(Products(
17 |                     name = item["title"],
18 |                     link = item["link"],
19 |                     summary = item["summary"],
20 |                     source = item["source"],
21 |                     engagement = item["engagement"]
22 |                     ))
23 |         return self.products
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Anuj Gupta
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use Python 3.11 slim image as base
 2 | FROM python:3.13-slim
 3 | 
 4 | # Set working directory
 5 | WORKDIR /app
 6 | 
 7 | # Set environment variables
 8 | ENV PYTHONDONTWRITEBYTECODE=1 \
 9 |     PYTHONUNBUFFERED=1 \
10 |     FLASK_APP=launch.py \
11 |     FLASK_ENV=production
12 | 
13 | # Install system dependencies
14 | RUN apt-get update && apt-get install -y --no-install-recommends \
15 |     build-essential \
16 |     libpq-dev \
17 |     && rm -rf /var/lib/apt/lists/*
18 | 
19 | # Create non-root user for security
20 | RUN adduser --disabled-password --gecos '' appuser
21 | 
22 | # Copy requirements first to leverage Docker cache
23 | COPY requirements.txt .
24 | 
25 | # Install Python dependencies
26 | RUN pip install --no-cache-dir -r requirements.txt
27 | 
28 | # Copy the rest of the application
29 | COPY . .
30 | 
31 | # Create vault directory
32 | RUN mkdir -p /app/db_handler/vault && \
33 |     chown -R appuser:appuser /app
34 | 
35 | # Switch to non-root user
36 | USER appuser
37 | 
38 | # Create volume for vault
39 | VOLUME ["/app/db_handler/vault"]
40 | 
41 | # Expose port
42 | EXPOSE 5000
43 | 
44 | # Command to run the application
45 | CMD ["python", "launch.py"]


--------------------------------------------------------------------------------
/static/favicon.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
 2 |   <!-- Background circle with subtle pulse animation -->
 3 |   <circle cx="50" cy="50" r="48" fill="white" opacity="0.1">
 4 |     <animate 
 5 |       attributeName="r" 
 6 |       values="46;48;46" 
 7 |       dur="3s" 
 8 |       repeatCount="indefinite" 
 9 |     />
10 |   </circle>
11 |   
12 |   <!-- Stylized 'A' centered in new viewport -->
13 |   <path 
14 |     d="M25 85 L50 15 L75 85 Z" 
15 |     fill="none" 
16 |     stroke="white" 
17 |     stroke-width="6"
18 |     stroke-linejoin="round"
19 |   />
20 |   
21 |   <!-- Dot for 'i' -->
22 |   <circle 
23 |     cx="82" 
24 |     cy="25" 
25 |     r="5" 
26 |     fill="white"
27 |   >
28 |     <animate
29 |       attributeName="opacity"
30 |       values="0.8;1;0.8"
31 |       dur="2s"
32 |       repeatCount="indefinite"
33 |     />
34 |   </circle>
35 |   
36 |   <!-- Lightning bolt with animation -->
37 |   <path 
38 |     d="M82 40 L75 55 L82 55 L75 70" 
39 |     fill="none" 
40 |     stroke="white" 
41 |     stroke-width="6" 
42 |     stroke-linecap="round" 
43 |     stroke-linejoin="round"
44 |   >
45 |     <animate
46 |       attributeName="stroke-width"
47 |       values="6;7;6"
48 |       dur="2s"
49 |       repeatCount="indefinite"
50 |     />
51 |   </path>
52 | </svg>


--------------------------------------------------------------------------------
/utils/auth_utility.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import jwt
 4 | import configparser
 5 | from functools import wraps
 6 | from flask import request, jsonify
 7 | from datetime import datetime, timedelta
 8 | 
 9 | config = configparser.ConfigParser()
10 | config.read('db_handler/vault/secrets.ini')
11 | JWT_SECRET_KEY = config["JWT"]["token"]
12 | 
13 | 
14 | def create_token(user_id):
15 |     payload = {
16 |         'exp': datetime.now() + timedelta(days=1),
17 |         'sub': user_id
18 |     }
19 | 
20 |     token = jwt.encode(
21 |         payload,
22 |         JWT_SECRET_KEY,
23 |         algorithm='HS256'
24 |     )
25 |     return token
26 | 
27 | 
28 | def token_required(f):
29 |     @wraps(f)
30 |     def decorated(*args, **kwargs):
31 |         token = None
32 | 
33 |         # Check for token in headers
34 |         if 'Authorization' in request.headers:
35 |             token = request.headers['Authorization'].split(" ")[1]
36 | 
37 |         if not token:
38 |             return jsonify({
39 |                 'message': 'Token is missing',
40 |                 'status': 'error'
41 |             }), 401
42 | 
43 |         try:
44 |             # Decode token
45 |             data = jwt.decode(token, JWT_SECRET_KEY, algorithms=["HS256"])
46 |             current_user = data['sub']
47 |         except Exception as e:
48 |             logging.info("Token error" + str(e))
49 |             return jsonify({
50 |                 'message': 'Token is invalid',
51 |                 'status': 'error'
52 |             }), 401
53 | 
54 |         return f(*args, **kwargs)
55 | 
56 |     return decorated
57 | 


--------------------------------------------------------------------------------
/db_handler/models.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from pydantic import BaseModel
 3 | from typing import List, Optional
 4 | 
 5 | class TaskType(Enum):
 6 |     DAILY = "daily"
 7 |     WEEKLY = "weekly"
 8 | 
 9 | class SchedulerState(Enum):
10 |     RUNNING = "running"
11 |     PAUSED = "paused"
12 |     STOPPED = "stopped"
13 | 
14 | class NewsItem(BaseModel):
15 |     title: str
16 |     description: str
17 |     link: str
18 |     read_time: int
19 |     source: Optional[str] = None
20 |     engagement: Optional[str] = None
21 |     additional_info: Optional[dict] = None
22 | 
23 | class Competitions(BaseModel):
24 |     name: str
25 |     link: str
26 |     deadline: str
27 |     reward: str
28 | 
29 | class Repo(BaseModel):
30 |     name: str
31 |     link: str
32 |     summary: str
33 |     source: Optional[str] = None
34 |     engagement: Optional[str] = None
35 | 
36 | class Products(BaseModel):
37 |     name: str
38 |     link: str
39 |     summary: str
40 |     source: Optional[str] = None
41 |     engagement: Optional[str] = None
42 | 
43 | class Event(BaseModel):
44 |     title: str
45 |     date: str
46 |     location: str
47 |     description: str
48 | 
49 | class ResearchPaper(BaseModel):
50 |     title: str
51 |     authors: List[str]
52 |     abstract: str
53 |     publication: str
54 |     link: str
55 |     date: str
56 |     engagement: Optional[str] = None
57 | 
58 | class NewsletterContent(BaseModel):
59 |     # model_config = dict(arbitrary_types_allowed=True)
60 |     highlights: List[dict] | None = None
61 |     breaking_news: List[NewsItem] | None = None
62 |     research_papers: List[ResearchPaper] | None = None
63 |     latest_competitions: List[Competitions] | None = None
64 |     top_products: List[Products] | None = None
65 |     github_trending: List[Repo] | None = None
66 |     upcoming_events: List[Event] | None = None


--------------------------------------------------------------------------------
/services/apps/kg_service.py:
--------------------------------------------------------------------------------
 1 | import configparser
 2 | import os
 3 | import subprocess
 4 | 
 5 | config = configparser.ConfigParser()
 6 | config.read('db_handler/vault/secrets.ini')
 7 | 
 8 | default_cred = config["Kaggle"]["path"]
 9 | 
10 | class KaggleScanner:
11 |     def __init__(self, base_url: str = "", top_n=5, kaggle_cred_path=default_cred):
12 |         self.base_url = base_url
13 |         self.top_n = top_n
14 |         self.kaggle_cred_path = kaggle_cred_path
15 |         self.response = []
16 | 
17 |     def _get_top_n_kaggle_competitions(self):
18 |         try:
19 |             os.environ["KAGGLE_CONFIG_DIR"] = os.path.expanduser(self.kaggle_cred_path)
20 |             result = subprocess.run(
21 |                 ["kaggle", "competitions", "list", "--sort-by", "prize"],
22 |                 stdout=subprocess.PIPE,
23 |                 text=True
24 |             )
25 |             if result.returncode != 0:
26 |                 print("Error fetching Kaggle competitions:", result.stderr)
27 |                 return
28 | 
29 |             lines = result.stdout.strip().split("\n")
30 |             data_rows = [line for line in lines if "https://www.kaggle.com" in line]
31 |             response = []
32 | 
33 |             for row in data_rows[:self.top_n]:
34 |                 columns = row.split()
35 |                 if len(columns) > 0:
36 |                     competition_link = columns[0]
37 |                     deadline = columns[1]
38 |                     reward = columns[4]
39 | 
40 |                     competition_name = competition_link.split("/")[-1]
41 | 
42 |                     response.append({
43 |                         "name": competition_name,
44 |                         "link": competition_link,
45 |                         "deadline": deadline,
46 |                         "reward": reward
47 |                     })
48 |             return response
49 |         except Exception as e:
50 |             print(f"Error: {e}")
51 | 
52 |     def get_new_competitions_launch(self):
53 |         self.response = self._get_top_n_kaggle_competitions()
54 |         return self.response
55 | 


--------------------------------------------------------------------------------
/services/apps/hf_service.py:
--------------------------------------------------------------------------------
 1 | import configparser
 2 | 
 3 | import requests
 4 | 
 5 | config = configparser.ConfigParser()
 6 | config.read('db_handler/vault/secrets.ini')
 7 | 
 8 | default_token = config["HuggingFace"]["token"]
 9 | 
10 | class HuggingFaceScanner:
11 |     def __init__(self, base_url, top_n=5, auth_token=default_token):
12 |         self.base_url = base_url
13 |         self.top_n = top_n
14 |         self.auth_token = "Bearer "+auth_token
15 |         self.response = {}
16 | 
17 |     def _top_models(self, top_n):
18 |         url = self.base_url+"/api/models"
19 |         response = requests.get(
20 |             url, params={"limit": top_n, "full": "True", "config": "False"},
21 |             headers={"Authorization":self.auth_token}
22 |         )
23 |         return [{"title":model["modelId"],
24 |                  "link":self.base_url+model["id"],
25 |                  "summary": model["author"],
26 |                  "source":"HuggingFace",
27 |                  "engagement": str(model["trendingScore"])}for model in response.json()]
28 | 
29 |     def _top_datasets(self, top_n):
30 |         url = self.base_url+"/api/datasets"
31 |         response = requests.get(
32 |             url, params={"limit": top_n, "full": "False"},
33 |             headers={"Authorization":self.auth_token}
34 |         )
35 |         return [{"title": dataset["id"],
36 |                  "link": self.base_url + dataset["id"],
37 |                  "summary": dataset["author"],
38 |                  "source": "HuggingFace",
39 |                  "engagement": str(dataset["trendingScore"])} for dataset in response.json()]
40 | 
41 |     def _top_apps(self, top_n):
42 |         url = self.base_url+"/api/spaces"
43 |         response = requests.get(
44 |             url, params={"limit": top_n, "full": "True"},
45 |             headers={"Authorization":self.auth_token}
46 |         )
47 |         return [{"title": apps["id"],
48 |                  "link": self.base_url + apps["id"],
49 |                  "summary": apps["author"],
50 |                  "source": "HuggingFace",
51 |                  "engagement": str(apps["trendingScore"])} for apps in response.json()]
52 | 
53 |     def weekly_scanner(self):
54 |         self.response["top_models"] =  self._top_models(self.top_n)
55 |         self.response["top_datasets"] = self._top_datasets(self.top_n)
56 |         self.response["top_apps"] = self._top_apps(self.top_n)
57 |         return self.response


--------------------------------------------------------------------------------
/services/research_service.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import configparser
 3 | from sklearn import svm
 4 | from db_handler import sites
 5 | from typing import List, Dict
 6 | 
 7 | from db_handler import ResearchPaper
 8 | from services.apps import ArxivScanner
 9 | from services.apps import OpenReviewScanner
10 | from sklearn.feature_extraction.text import TfidfVectorizer
11 | 
12 | 
13 | config = configparser.ConfigParser()
14 | config.read('db_handler/vault/secrets.ini')
15 | 
16 | class ResearchService:
17 |     def __init__(self, top_n:int = 3):
18 |         self.top_n = top_n
19 |         self. arxiv = ArxivScanner(sites["arxiv_url"], top_n=top_n)
20 |         self.open_review = OpenReviewScanner(top_n=top_n)
21 |         self.top_papers = []
22 | 
23 |     def _rerank(self, arxiv_papers: List[Dict], open_papers: List[Dict]) -> List[Dict]:
24 |         all_papers = arxiv_papers + open_papers
25 |         texts = [f"{p['title']} {p['abstract']} {' '.join(p['authors'])}" for p in all_papers]
26 | 
27 |         vectorizer = TfidfVectorizer(
28 |             max_features=5000,
29 |             stop_words='english',
30 |             ngram_range=(1, 2)
31 |         )
32 |         x = vectorizer.fit_transform(texts)
33 |         y = np.zeros(len(all_papers))
34 |         for i, paper in enumerate(all_papers):
35 |             score = float(paper.get('score', 0))
36 |             citations = float(paper.get('citations', 0))
37 |             y[i] = score + 0.1 * citations
38 | 
39 |         if y.max() > y.min():
40 |             y = (y - y.min()) / (y.max() - y.min())
41 | 
42 |         clf = svm.LinearSVC(
43 |             class_weight='balanced',
44 |             max_iter=1000,
45 |             dual=False
46 |         )
47 |         clf.fit(x, y > np.median(y))
48 |         scores = clf.decision_function(x)
49 |         scored_papers = [(paper, score) for paper, score in zip(all_papers, scores)]
50 |         reranked = sorted(scored_papers, key=lambda x: x[1], reverse=True)
51 |         return [paper for paper, _ in reranked[:self.top_n]]
52 | 
53 |     async def get_latest_papers(self):
54 |         search_query = config["Arxiv"]["q"]
55 |         arxiv_papers = self.arxiv.get_top_n_papers(search_query=search_query)
56 |         open_r_papers = self.open_review.get_top_n_papers()
57 |         reranked_papers = self._rerank(arxiv_papers, open_r_papers)
58 |         self.top_papers.extend(ResearchPaper(
59 |             title = paper["title"],
60 |             abstract= paper["abstract"],
61 |             authors = paper["authors"],
62 |             publication = paper["publication"],
63 |             date = paper["_time_str"],
64 |             link = paper["url"],
65 |             engagement = "") for paper in reranked_papers)
66 |         return self.top_papers
67 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy to EC2
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     types: [closed]
 6 |     branches: [ main ]
 7 |   workflow_dispatch:
 8 | 
 9 | env:
10 |   DOCKER_IMAGE_TAG: ${{ github.sha }}
11 | 
12 | jobs:
13 |   deploy:
14 |     runs-on: ubuntu-latest
15 |     if: github.event.pull_request.merged == true
16 | 
17 |     steps:
18 |       - name: Checkout Repository
19 |         uses: actions/checkout@v4
20 | 
21 |       - name: Configure AWS Credentials
22 |         uses: aws-actions/configure-aws-credentials@v4
23 |         with:
24 |           aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
25 |           aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
26 |           aws-region: ${{ secrets.AWS_REGION }}
27 | 
28 |       - name: Setup SSH and Deploy
29 |         run: |
30 |           # Setup SSH
31 |           mkdir -p ~/.ssh
32 |           echo "${{ secrets.EC2_SSH_KEY }}" > ~/.ssh/id_rsa
33 |           chmod 600 ~/.ssh/id_rsa
34 |           ssh-keyscan -H ${{ secrets.EC2_HOST }} >> ~/.ssh/known_hosts
35 | 
36 |           # Deploy to EC2
37 |           ssh ${{ secrets.EC2_USER }}@${{ secrets.EC2_HOST }} "
38 |             # Export AWS credentials
39 |             export AWS_ACCESS_KEY_ID='${{ secrets.AWS_ACCESS_KEY_ID }}'
40 |             export AWS_SECRET_ACCESS_KEY='${{ secrets.AWS_SECRET_ACCESS_KEY }}'
41 |             export AWS_REGION='${{ secrets.AWS_REGION }}'
42 | 
43 |             # Create and setup directories
44 |             sudo mkdir -p /data/newsletter/vault
45 |             sudo chown -R ${{ secrets.EC2_USER }}:${{ secrets.EC2_USER }} /data/newsletter
46 | 
47 |             # Sync S3
48 |             aws s3 sync s3://${{ secrets.S3_CONFIG_BUCKET }}/vault/ /data/newsletter/vault/
49 | 
50 |             # Deploy with Docker
51 |             sudo docker build -t ailert-newsletter:${{ env.DOCKER_IMAGE_TAG }} https://github.com/${{ github.repository }}.git#${{ github.ref }}
52 |             
53 |             sudo docker stop ailert-newsletter || true
54 |             sudo docker rm ailert-newsletter || true
55 |             
56 |             sudo docker run -d \
57 |               --name ailert-newsletter \
58 |               -p 5000:5000 \
59 |               -v /data/newsletter/vault:/app/db_handler/vault \
60 |               --restart unless-stopped \
61 |               -e AWS_ACCESS_KEY_ID='${{ secrets.AWS_ACCESS_KEY_ID }}' \
62 |               -e AWS_SECRET_ACCESS_KEY='${{ secrets.AWS_SECRET_ACCESS_KEY }}' \
63 |               -e AWS_REGION='${{ secrets.AWS_REGION }}' \
64 |               -e SMTP_USERNAME='${{ secrets.SMTP_USERNAME }}' \
65 |               -e SMTP_PASSWORD='${{ secrets.SMTP_PASSWORD }}' \
66 |               -e JWT_SECRET='${{ secrets.JWT_SECRET }}' \
67 |               ailert-newsletter:${{ env.DOCKER_IMAGE_TAG }}
68 | 
69 |             sudo docker system prune -f --volumes
70 |           "
71 | 
72 |       - name: Cleanup
73 |         if: always()
74 |         run: rm -f ~/.ssh/id_rsa


--------------------------------------------------------------------------------
/static/newsletter.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |     <title>AiLert Weekly Newsletter</title>
 7 |     <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
 8 |       <!-- Background circle with subtle pulse animation -->
 9 |       <circle cx="50" cy="50" r="48" fill="white" opacity="0.1">
10 |         <animate
11 |           attributeName="r"
12 |           values="46;48;46"
13 |           dur="3s"
14 |           repeatCount="indefinite"
15 |         />
16 |       </circle>
17 | 
18 |       <!-- Stylized 'A' centered in new viewport -->
19 |       <path
20 |         d="M25 85 L50 15 L75 85 Z"
21 |         fill="none"
22 |         stroke="white"
23 |         stroke-width="6"
24 |         stroke-linejoin="round"
25 |       />
26 | 
27 |       <!-- Dot for 'i' -->
28 |       <circle
29 |         cx="82"
30 |         cy="25"
31 |         r="5"
32 |         fill="white"
33 |       >
34 |         <animate
35 |           attributeName="opacity"
36 |           values="0.8;1;0.8"
37 |           dur="2s"
38 |           repeatCount="indefinite"
39 |         />
40 |       </circle>
41 | 
42 |       <!-- Lightning bolt with animation -->
43 |       <path
44 |         d="M82 40 L75 55 L82 55 L75 70"
45 |         fill="none"
46 |         stroke="white"
47 |         stroke-width="6"
48 |         stroke-linecap="round"
49 |         stroke-linejoin="round"
50 |       >
51 |         <animate
52 |           attributeName="stroke-width"
53 |           values="6;7;6"
54 |           dur="2s"
55 |           repeatCount="indefinite"
56 |         />
57 |       </path>
58 |     </svg>
59 |     <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
60 |     <link rel="stylesheet" href="style.css">
61 | </head>
62 | <body>
63 |     <div class="container">
64 |         <div class="top-nav">
65 |             <a href="#">Signup</a> |
66 |             <a href="#">Follow on X</a>
67 |         </div>
68 | 
69 |         <div class="header">
70 |             <div class="header-content">
71 |                 <div class="header-top">
72 |                     <img src="logo.svg" alt="{{brand_name}} Logo" class="logo">
73 |                     <div class="brand-name">{{brand_name}}</div>
74 |                 </div>
75 |                 <h2>Your Weekly AI Intelligence Pulse</h2>
76 |                 <p>Dive into this week's most groundbreaking AI developments.</p>
77 |             </div>
78 |         </div>
79 | 
80 |         {{content}}
81 | 
82 |         <div class="footer">
83 |             <p>© {{current_year}} {{brand_name}} - Your Weekly AI Intelligence Briefing</p>
84 |             <p>
85 |                 <a href="#">Unsubscribe</a>
86 |                 <a href="#">Update Preferences</a>
87 |                 <a href="#">View in Browser</a>
88 |                 <a href="#">Privacy Policy</a>
89 |             </p>
90 |         </div>
91 |     </div>
92 | </body>
93 | </html>


--------------------------------------------------------------------------------
/services/apps/gh_service.py:
--------------------------------------------------------------------------------
 1 | import jwt
 2 | import time
 3 | import requests
 4 | import configparser
 5 | from db_handler import Repo
 6 | from bs4 import BeautifulSoup
 7 | 
 8 | config = configparser.ConfigParser()
 9 | config.read('db_handler/vault/secrets.ini')
10 | 
11 | default_pem = config["GitHub"]["pem_path"]
12 | default_clientId = config["GitHub"]["client_id"]
13 | 
14 | class GitHubScanner:
15 |     def __init__(self, site_url, ftype, top_n=5, pem_path=default_pem, client_id=default_clientId):
16 |         self.site_url = site_url
17 |         self.ftype = ftype
18 |         self.top_n = top_n
19 |         self.pem_path = pem_path
20 |         self.client_id = client_id
21 |         self.response = []
22 | 
23 |     def _gh_authenticate(self):
24 |         with open(self.pem_path, 'rb') as pem_file:
25 |             signing_key = pem_file.read()
26 | 
27 |         payload = {
28 |             'iat': int(time.time()),
29 |             'exp': int(time.time()) + 600,
30 |             'iss': self.client_id
31 |         }
32 | 
33 |         encoded_jwt = jwt.encode(payload, signing_key, algorithm='RS256')
34 |         return encoded_jwt
35 | 
36 |     def _extract_from_html(self, link):
37 |         repos = []
38 |         try:
39 |             response = requests.get(link)
40 |             response.raise_for_status()
41 |             soup = BeautifulSoup(response.text, 'html.parser')
42 |             repo_list = soup.find_all('article', class_='Box-row')
43 | 
44 |             for repo in repo_list:
45 |                 name = repo.find('h2', class_='h3').text.strip().replace('\n', '').replace(' ', '')
46 | 
47 |                 description = repo.find('p', class_='col-9 color-fg-muted my-1 pr-4')
48 |                 description = description.text.strip() if description else "No description provided."
49 | 
50 |                 stars_element = repo.find('a', class_='Link Link--muted d-inline-block mr-3') or \
51 |                                 repo.find('a', class_='Link--muted d-inline-block mr-3')
52 |                 stars = stars_element.text.strip().replace(',', '') if stars_element else "0"
53 | 
54 |                 fork_elements = repo.find_all('a', class_='Link Link--muted d-inline-block mr-3') or \
55 |                                 repo.find_all('a', class_='Link--muted d-inline-block mr-3')
56 |                 forks = fork_elements[1].text.strip().replace(',', '') if len(fork_elements) > 1 else "0"
57 | 
58 |                 repos.append({
59 |                     'name': name,
60 |                     'description': description,
61 |                     'stars': str(stars),
62 |                     'forks': str(forks)
63 |                 })
64 | 
65 |             return repos[:self.top_n]
66 |         except Exception as e:
67 |             print(f"Error: {str(e)}")
68 | 
69 |     def _daily_trending_repos(self):
70 |         repositories = self._extract_from_html(self.site_url)
71 |         return repositories
72 | 
73 |     def _weekly_trending_repos(self):
74 |         repositories = self._extract_from_html(self.site_url)
75 |         return  repositories
76 | 
77 |     async def get_trending_repos(self):
78 |         if self.ftype == "daily":
79 |             repositories = self._daily_trending_repos()
80 |         else:
81 |             repositories = self._weekly_trending_repos()
82 |         self.response.extend(Repo(
83 |             name = repo["name"],
84 |             link = "",
85 |             summary = repo["description"],
86 |             source = "GitHub",
87 |             engagement = repo["stars"]) for repo in repositories)
88 |         return self.response


--------------------------------------------------------------------------------
/services/apps/ph_service.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from bs4 import BeautifulSoup
 3 | from datetime import datetime, timedelta
 4 | 
 5 | class ProductHuntScanner:
 6 |     def __init__(self, site_url, graph_url, top_n=5):
 7 |         self.site_url = site_url
 8 |         self.graph_url = graph_url
 9 |         self.top_n = top_n
10 |         self.response = []
11 | 
12 |     def get_last_week_top_products(self):
13 |         headers = {
14 |             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
15 |         }
16 |         try:
17 |             response = requests.get(self.site_url, headers=headers)
18 |             response.raise_for_status()
19 |             soup = BeautifulSoup(response.text, "html.parser")
20 |             last_week_section = soup.find("section", string="Last Week's Top Products")
21 |             if not last_week_section:
22 |                 print("Could not find 'Last Week's Top Products' section.")
23 |                 return []
24 | 
25 |             products = []
26 |             for product in last_week_section.find_all("li"):
27 |                 title = product.find("h3").get_text(strip=True) if product.find("h3") else "No Title"
28 |                 link = product.find("a", href=True)["href"] if product.find("a", href=True) else "No Link"
29 |                 products.append({"title": title, "link": f"{self.site_url}{link}"})
30 | 
31 |             return products
32 |         except Exception as e:
33 |             print(f"Error fetching data: {e}")
34 |             return []
35 | 
36 |     def get_last_month_top_products(self, api_key):
37 |         query = """
38 |         query ($dateFrom: DateTime!, $dateTo: DateTime!) {
39 |           posts(first: 10, postedAfter: $dateFrom, postedBefore: $dateTo, order: VOTES_COUNT) {
40 |             edges {
41 |               node {
42 |                 id
43 |                 name
44 |                 tagline
45 |                 url
46 |                 votesCount
47 |               }
48 |             }
49 |           }
50 |         }
51 |         """
52 |         today = datetime.utcnow()
53 |         first_day_of_this_month = datetime(today.year, today.month, 1)
54 |         last_day_of_last_month = first_day_of_this_month - timedelta(days=1)
55 |         first_day_of_last_month = datetime(last_day_of_last_month.year, last_day_of_last_month.month, 1)
56 | 
57 |         variables = {
58 |             "dateFrom": first_day_of_last_month.isoformat(),
59 |             "dateTo": last_day_of_last_month.isoformat()
60 |         }
61 | 
62 |         # Set headers with API key
63 |         headers = {
64 |             "Authorization": f"Bearer {api_key}",
65 |             "Content-Type": "application/json"
66 |         }
67 | 
68 |         try:
69 |             response = requests.post(self.graph_url, json={"query": query, "variables": variables}, headers=headers)
70 |             response.raise_for_status()
71 |             data = response.json()
72 | 
73 |             products = data.get("data", {}).get("posts", {}).get("edges", [])
74 |             if not products:
75 |                 print("No products found for last month.")
76 |                 return []
77 | 
78 |             result = []
79 |             for product in products:
80 |                 node = product["node"]
81 |                 result.append({
82 |                     "title": node["name"],
83 |                     "summary": node["tagline"],
84 |                     "link": node["url"],
85 |                     "engagement": node["votesCount"],
86 |                     "source": "Product Hunt"
87 |                 })
88 | 
89 |             return result
90 |         except Exception as e:
91 |             print(f"Error fetching data: {e}")
92 |             return []
93 | 


--------------------------------------------------------------------------------
/services/crawler/rss_crawler.py:
--------------------------------------------------------------------------------
  1 | import pytz
  2 | import html
  3 | import feedparser
  4 | from datetime import datetime
  5 | 
  6 | import requests
  7 | import xml.etree.ElementTree as et
  8 | from urllib.parse import urlparse
  9 | 
 10 | 
 11 | def is_rss_feed(url):
 12 |     try:
 13 |         parsed_url = urlparse(url)
 14 |         if not all([parsed_url.scheme, parsed_url.netloc]):
 15 |             return False
 16 | 
 17 |         headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
 18 |         response = requests.get(url, headers=headers, timeout=10)
 19 |         response.raise_for_status()
 20 | 
 21 |         content_type = response.headers.get('content-type', '').lower()
 22 |         if not any(valid_type in content_type for valid_type in ['application/rss+xml', 'application/xml', 'text/xml']):
 23 |             return False
 24 | 
 25 |         root = et.fromstring(response.content)
 26 |         rss_indicators = [
 27 |             'rss',
 28 |             'feed',
 29 |             'channel',
 30 |             'item',
 31 |             'entry'
 32 |         ]
 33 | 
 34 |         if root.tag in rss_indicators:
 35 |             return True
 36 | 
 37 |         for child in root:
 38 |             if child.tag in rss_indicators:
 39 |                 return True
 40 |         return False
 41 |     except requests.RequestException:
 42 |         return False
 43 |     except et.ParseError:
 44 |         return False
 45 |     except Exception:
 46 |         return False
 47 | 
 48 | def load_feed(self, url):
 49 |     self.feed_url = url
 50 |     try:
 51 |         self.feed_data = feedparser.parse(url)
 52 |         return len(self.feed_data.entries) > 0
 53 |     except Exception as e:
 54 |         print(f"Error loading feed: {e}")
 55 |         return False
 56 | 
 57 | def get_feed_info(self):
 58 |     if not self.feed_data:
 59 |         return None
 60 | 
 61 |     return {
 62 |         'title': self.feed_data.feed.get('title', 'No title'),
 63 |         'description': self.feed_data.feed.get('description', 'No description'),
 64 |         'link': self.feed_data.feed.get('link', ''),
 65 |         'last_updated': self.feed_data.feed.get('updated', 'No update date')
 66 |     }
 67 | 
 68 | def get_entries(self, limit=None, sort_by_date=True):
 69 |     if not self.feed_data:
 70 |         return []
 71 | 
 72 |     entries = []
 73 |     for entry in self.feed_data.entries:
 74 |         clean_entry = {
 75 |             'title': html.unescape(entry.get('title', 'No title')),
 76 |             'link': entry.get('link', ''),
 77 |             'description': html.unescape(entry.get('description', 'No description')),
 78 |             'author': entry.get('author', 'Unknown author'),
 79 |             'published': entry.get('published', 'No publication date'),
 80 |             'updated': entry.get('updated', entry.get('published', 'No update date'))
 81 |         }
 82 |         try:
 83 |             date = entry.get('updated_parsed', entry.get('published_parsed'))
 84 |             if date:
 85 |                 clean_entry['timestamp'] = datetime(*date[:6], tzinfo=pytz.UTC)
 86 |         except (TypeError, ValueError):
 87 |             clean_entry['timestamp'] = None
 88 | 
 89 |         entries.append(clean_entry)
 90 |     if sort_by_date:
 91 |         entries.sort(key=lambda x: x['timestamp'] if x['timestamp'] else datetime.min.replace(tzinfo=pytz.UTC),
 92 |                      reverse=True)
 93 |     if limit:
 94 |         entries = entries[:limit]
 95 | 
 96 |     return entries
 97 | 
 98 | def search_entries(self, keyword, case_sensitive=False):
 99 |     if not self.feed_data:
100 |         return []
101 | 
102 |     matches = []
103 |     entries = self.get_entries()
104 | 
105 |     for entry in entries:
106 |         search_text = f"{entry['title']} {entry['description']}"
107 |         if not case_sensitive:
108 |             search_text = search_text.lower()
109 |             keyword = keyword.lower()
110 | 
111 |         if keyword in search_text:
112 |             matches.append(entry)
113 | 
114 |     return matches
115 | 


--------------------------------------------------------------------------------
/app/main.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import logging
  3 | import schedule
  4 | import configparser
  5 | import pandas as pd
  6 | from utils import utility
  7 | from typing import Optional
  8 | from services import EmailService
  9 | from threading import Thread, Event
 10 | from db_handler import sites, Dynamo, TaskType
 11 | from builder.builder import NewsletterBuilder
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | logging.basicConfig(level=logging.INFO)
 15 | 
 16 | stop_event = Event()
 17 | scheduler_thread: Optional[Thread] = None
 18 | scheduler_state = {"is_running": False, "is_paused": False, "task_type": None}
 19 | 
 20 | config = configparser.ConfigParser()
 21 | config.read('db_handler/vault/secrets.ini')
 22 | region = config["Dynamo"]["region"]
 23 | 
 24 | dynamo = Dynamo(region)
 25 | 
 26 | df = pd.read_csv("db_handler/vault/recipients.csv")
 27 | subscribers = df['email'].tolist()
 28 | 
 29 | def run_scheduler(task_type: str):
 30 |     if task_type == TaskType.WEEKLY.value:
 31 |         schedule.every().monday.at("00:00").do(weekly_task)
 32 |         logging.info("Weekly scheduler started")
 33 |     else:
 34 |         schedule.every().day.at("00:00").do(daily_task)
 35 |         logging.info("Daily scheduler started")
 36 | 
 37 |     while not stop_event.is_set():
 38 |         if not scheduler_state["is_paused"]:
 39 |             schedule.run_pending()
 40 |         time.sleep(1)
 41 | 
 42 |     schedule.clear()
 43 |     scheduler_state["is_running"] = False
 44 |     logging.info("Scheduler stopped")
 45 | 
 46 | 
 47 | async def generate_newsletter(sections, task_type):
 48 |     if task_type == TaskType.WEEKLY.value:
 49 |         urls = sites["gh_weekly_url"]
 50 |     else:
 51 |         urls = sites["gh_daily_url"]
 52 | 
 53 |     weekly = NewsletterBuilder({
 54 |         "gh_url": urls,
 55 |         "gh_ftype": task_type},
 56 |         dynamo)
 57 |     weekly.set_sections(sections)
 58 |     content = await weekly.section_generator()
 59 |     newsletter_html = await weekly.build(content)
 60 |     return newsletter_html
 61 | 
 62 | 
 63 | async def daily_task():
 64 |     daily = NewsletterBuilder({
 65 |         "gh_url": sites["gh_daily_url"],
 66 |         "gh_ftype": "daily"},
 67 |         dynamo)
 68 |     daily.set_sections(["news"])
 69 |     logger.info(f"starting generator")
 70 |     content = await daily.section_generator()
 71 |     logger.info(f"sections generated")
 72 |     newsletter_html = await daily.build(content)
 73 |     newsletter_html = utility.inline_css(newsletter_html, "static")
 74 |     newsletter_html = utility.inline_svg_images(newsletter_html, "static")
 75 |     logger.info("content updated")
 76 |     item = save_to_db(newsletter_html, "daily")
 77 |     logger.info(f"saved to db, sending email")
 78 |     await send_email(content=item["content"])
 79 |     logger.info(f"email sent")
 80 | 
 81 | 
 82 | async def weekly_task():
 83 |     weekly = NewsletterBuilder({
 84 |         "gh_url": sites["gh_weekly_url"],
 85 |         "gh_ftype": "weekly"},
 86 |         dynamo)
 87 |     weekly.set_sections(["all"])
 88 |     logger.info(f"starting generator")
 89 |     content = await weekly.section_generator()
 90 |     logger.info(f"sections generated")
 91 |     newsletter_html = await weekly.build(content)
 92 |     logger.info(f"newsletter build complete")
 93 |     newsletter_html = utility.inline_css(newsletter_html, "static")
 94 |     newsletter_html = utility.inline_svg_images(newsletter_html, "static")
 95 |     logger.info("content updated")
 96 |     item = save_to_db(newsletter_html, "weekly")
 97 |     logger.info(f"saved to db, sending email")
 98 |     await send_email(content=item["content"])
 99 |     logger.info(f"email sent")
100 | 
101 | 
102 | def save_to_db(content, content_type):
103 |     try:
104 |         item = {
105 |             "item_name": "newsletter",
106 |             "type": content_type,
107 |             "content": content,
108 |             "created": utility.get_formatted_timestamp()
109 |         }
110 | 
111 |         item_id = utility.generate_deterministic_id(item, key_fields=["item_name", "type"], prefix="nl")
112 |         item["newsletterId"] = item_id
113 |         dynamo.add_item("newsletter", "newsletterId", item, False)
114 |         return item
115 |     except Exception as e:
116 |         logging.info("Error saving to dynamo db", e)
117 | 
118 | 
119 | async def send_email(content=None, template_id=None, recipients=subscribers):
120 |     email_service = EmailService(
121 |         recipients=recipients,
122 |         body_text = content,
123 |         template_id=template_id
124 |     )
125 |     result = email_service.send_email()
126 |     return result
127 | 


--------------------------------------------------------------------------------
/services/email_service.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import configparser
  3 | from typing import List, Optional
  4 | from sendgrid import SendGridAPIClient
  5 | from sendgrid.helpers.mail import Mail, Content
  6 | 
  7 | 
  8 | config = configparser.ConfigParser()
  9 | config.read('db_handler/vault/secrets.ini')
 10 | api_key = config["Sendgrid"]["api_key"]
 11 | 
 12 | class EmailService:
 13 |     def __init__(self, recipients: Optional[List[str]] = None,
 14 |                  subject: Optional[str] = None,
 15 |                  body_text: Optional[str] = None,
 16 |                  template_id: Optional[str] = None):
 17 |         self.sender = "weekly@ailert.tech"
 18 |         self.recipients = recipients if recipients else []
 19 |         self.subject = subject if subject else "Weekly Newsletter"
 20 |         self.charset = "UTF-8"
 21 |         self.body_text = body_text
 22 |         self.template_id = template_id
 23 | 
 24 |         # Initialize SendGrid client
 25 |         try:
 26 |             self.sg_client = SendGridAPIClient(api_key=api_key)
 27 |         except Exception as e:
 28 |             logging.error(f"Failed to initialize SendGrid client: {str(e)}")
 29 |             raise
 30 | 
 31 |     def _create_mail_object(self, recipient: str) -> Mail:
 32 |         """Create a Mail object for a single recipient"""
 33 |         from_email = self.sender
 34 |         to_email = recipient
 35 | 
 36 |         mail = Mail(
 37 |             from_email=from_email,
 38 |             to_emails=to_email,
 39 |             subject=self.subject,
 40 |             html_content=self.body_text
 41 |         )
 42 | 
 43 |         # if self.template_id:
 44 |         #     mail.template_id = self.template_id
 45 |         # else:
 46 |         #     content = Content("text/html", self.body_text)
 47 |         #     mail.content = [content]
 48 | 
 49 |         return mail
 50 | 
 51 |     def send_email(self) -> dict:
 52 |         """
 53 |         Send emails to all recipients using SendGrid
 54 |         Returns:
 55 |             dict: Status of email sending operation
 56 |         """
 57 |         if not self.recipients:
 58 |             return {
 59 |                 "status": "error",
 60 |                 "message": "No recipients specified",
 61 |                 "failed_recipients": []
 62 |             }
 63 | 
 64 |         failed_recipients = []
 65 |         successful_count = 0
 66 | 
 67 |         for recipient in self.recipients:
 68 |             try:
 69 |                 mail = self._create_mail_object(recipient)
 70 |                 response = self.sg_client.send(mail)
 71 | 
 72 |                 if response.status_code in [200, 201, 202]:
 73 |                     successful_count += 1
 74 |                     logging.info(f"Email sent successfully to {recipient}")
 75 |                 else:
 76 |                     failed_recipients.append({
 77 |                         "email": recipient,
 78 |                         "error": f"SendGrid API returned status code: {response.status_code}"
 79 |                     })
 80 |                     logging.error(f"Failed to send email to {recipient}. Status code: {response.status_code}")
 81 | 
 82 |             except Exception as e:
 83 |                 failed_recipients.append({
 84 |                     "email": recipient,
 85 |                     "error": str(e)
 86 |                 })
 87 |                 logging.error(f"Exception while sending email to {recipient}: {str(e)}")
 88 | 
 89 |         status = "success" if not failed_recipients else "partial_success" if successful_count else "error"
 90 | 
 91 |         return {
 92 |             "status": status,
 93 |             "message": f"Successfully sent {successful_count} out of {len(self.recipients)} emails",
 94 |             "failed_recipients": failed_recipients
 95 |         }
 96 | 
 97 |     def add_recipient(self, recipient: str) -> None:
 98 |         """Add a single recipient to the email list"""
 99 |         if recipient not in self.recipients:
100 |             self.recipients.append(recipient)
101 | 
102 |     def add_recipients(self, recipients: List[str]) -> None:
103 |         """Add multiple recipients to the email list"""
104 |         for recipient in recipients:
105 |             self.add_recipient(recipient)
106 | 
107 |     def set_template_id(self, template_id: str) -> None:
108 |         """Set the SendGrid template ID"""
109 |         self.template_id = template_id
110 | 
111 |     def set_body_text(self, body_text: str) -> None:
112 |         """Set the email body text"""
113 |         self.body_text = body_text
114 | 
115 |     def set_subject(self, subject: str) -> None:
116 |         """Set the email subject"""
117 |         self.subject = subject


--------------------------------------------------------------------------------
/static/style.css:
--------------------------------------------------------------------------------
  1 | /* Base Styles */
  2 | body {
  3 |     font-family: 'Segoe UI', -apple-system, BlinkMacSystemFont, sans-serif;
  4 |     line-height: 1.4;
  5 |     margin: 0;
  6 |     padding: 0;
  7 |     background-color: #f0f2f5;
  8 |     color: #2d3748;
  9 | }
 10 | 
 11 | .container {
 12 |     max-width: 600px;
 13 |     margin: 0 auto;
 14 |     background-color: white;
 15 |     box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
 16 | }
 17 | 
 18 | /* Top Navigation */
 19 | .top-nav {
 20 |     padding: 4px 0;
 21 |     text-align: right;
 22 |     margin-right: 12px;
 23 |     margin-bottom: 2px;
 24 | }
 25 | 
 26 | .top-nav a {
 27 |     color: #2c3e50;
 28 |     text-decoration: none;
 29 |     padding: 3px 8px;
 30 |     margin: 0 2px;
 31 |     font-size: 10px;
 32 |     border-radius: 12px;
 33 |     border: 1px solid #e5e7eb;
 34 | }
 35 | 
 36 | /* Header Styles */
 37 | .header {
 38 |     background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%);
 39 |     color: white;
 40 |     padding: 25px;
 41 |     border-radius: 12px;
 42 |     margin: 4px 12px 12px 12px;
 43 | }
 44 | 
 45 | .header-top {
 46 |     display: flex;
 47 |     align-items: center;
 48 |     margin-bottom: 8px;
 49 | }
 50 | 
 51 | .header-content {
 52 |     position: relative;
 53 |     z-index: 1;
 54 |     padding-right: 60px;
 55 | }
 56 | 
 57 | .logo {
 58 |     color: #ffffff;
 59 |     background: rgba(255, 255, 255);
 60 |     width: 40px;
 61 |     height: 40px;
 62 |     border-radius: 8px;
 63 |     margin-right: 15px;
 64 | }
 65 | 
 66 | .brand-name {
 67 |     font-size: 24px;
 68 |     font-weight: 700;
 69 |     letter-spacing: -0.5px;
 70 | }
 71 | 
 72 | .header h2 {
 73 |     font-size: 24px;
 74 |     font-weight: 400;
 75 |     margin: 0 0 12px 0;
 76 |     line-height: 1.3;
 77 | }
 78 | 
 79 | .header p {
 80 |     font-size: 16px;
 81 |     line-height: 1.4;
 82 |     margin: 0;
 83 |     opacity: 0.9;
 84 | }
 85 | 
 86 | /* Content Sections */
 87 | .section {
 88 |     margin: 12px;
 89 |     padding: 16px;
 90 |     border-radius: 8px;
 91 |     background: white;
 92 |     box-shadow: 0 1px 2px rgba(0, 0, 0, 0.04);
 93 | }
 94 | 
 95 | .summary-section {
 96 |     background: linear-gradient(135deg, #e0e7ff 0%, #f0f7ff 100%);
 97 | }
 98 | 
 99 | .section-title {
100 |     color: #4f46e5;
101 |     font-size: 18px;
102 |     font-weight: 700;
103 |     margin-bottom: 12px;
104 |     padding-bottom: 8px;
105 |     border-bottom: 1px solid #b0b8e6;
106 | }
107 | 
108 | /* News Items */
109 | .news-item {
110 |     padding: 12px;
111 |     margin-bottom: 12px;
112 |     background: white;
113 |     border-radius: 6px;
114 |     border: 1px solid #e5e7eb;
115 | }
116 | 
117 | .news-title {
118 |     color: #4338ca;
119 |     font-size: 16px;
120 |     font-weight: 600;
121 |     margin-bottom: 6px;
122 | }
123 | 
124 | .news-item p {
125 |     margin: 0 0 8px 0;
126 |     font-size: 14px;
127 |     line-height: 1.4;
128 | }
129 | 
130 | /* Trending Button */
131 | .trending-button {
132 |     display: inline-flex;
133 |     align-items: center;
134 |     background: linear-gradient(135deg, #f0f7ff 0%, #e0e7ff 100%);
135 |     padding: 4px 10px;
136 |     border-radius: 12px;
137 |     font-size: 12px;
138 |     color: #4338ca;
139 |     margin-top: 8px;
140 | }
141 | 
142 | .trending-button i {
143 |     margin-right: 6px;
144 |     color: #6366f1;
145 | }
146 | 
147 | /* Share Section */
148 | .share-section {
149 |     /* background: linear-gradient(135deg, #818cf8 0%, #6366f1 100%);*/
150 |     background: linear-gradient(135deg, #f0f7ff 0%, #e0e7ff 100%);
151 |     color: black;
152 |     text-align: center;
153 |     padding: 20px 16px;
154 | }
155 | 
156 | .share-button {
157 |     padding: 8px 16px;
158 |     font-size: 13px;
159 |     border-radius: 16px;
160 |     margin: 6px;
161 | }
162 | 
163 | /* Feedback Section */
164 | .feedback-section {
165 |     background: linear-gradient(135deg, #f0f7ff 0%, #e0e7ff 100%);
166 |     text-align: center;
167 |     padding: 20px 16px;
168 | }
169 | 
170 | .feedback-button {
171 |     padding: 8px 16px;
172 |     border-radius: 16px;
173 |     font-size: 13px;
174 |     margin: 0 6px;
175 | }
176 | 
177 | /* Read Time */
178 | .read-time {
179 |     display: inline-flex;
180 |     align-items: center;
181 |     padding: 4px 10px;
182 |     border-radius: 12px;
183 |     margin-top: 8px;
184 |     font-size: 12px;
185 | }
186 | 
187 | /* Footer */
188 | .footer {
189 |     background: linear-gradient(135deg, #4338ca 0%, #3730a3 100%);
190 |     color: white;
191 |     padding: 20px 16px;
192 |     text-align: center;
193 |     font-size: 12px;
194 | }
195 | 
196 | .footer a {
197 |     padding: 0 8px;
198 | }
199 | 
200 | /* Responsive Design */
201 | @media (max-width: 600px) {
202 |     .section {
203 |         margin: 8px;
204 |         padding: 12px;
205 |     }
206 | 
207 |     .header {
208 |         padding: 20px;
209 |         margin: 4px 8px 8px 8px;
210 |     }
211 | 
212 |     .header h2 {
213 |         font-size: 20px;
214 |     }
215 | 
216 |     .news-item {
217 |         padding: 10px;
218 |         margin-bottom: 10px;
219 |     }
220 | }


--------------------------------------------------------------------------------
/db_handler/db.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Database support functions.
  3 | The idea is that none of the individual scripts deal directly with the file system.
  4 | Any of the file system I/O and the associated settings are in this single file.
  5 | """
  6 | 
  7 | import os
  8 | import sqlite3, zlib, pickle, tempfile
  9 | from sqlitedict import SqliteDict
 10 | from contextlib import contextmanager
 11 | 
 12 | 
 13 | DATA_DIR = 'data'
 14 | 
 15 | @contextmanager
 16 | def _tempfile(*args, **kws):
 17 |     """ Context for temporary file.
 18 |     Will find a free temporary filename upon entering
 19 |     and will try to delete the file on leaving
 20 |     Parameters
 21 |     ----------
 22 |     suffix : string
 23 |         optional file suffix
 24 |     """
 25 | 
 26 |     fd, name = tempfile.mkstemp(*args, **kws)
 27 |     os.close(fd)
 28 |     try:
 29 |         yield name
 30 |     finally:
 31 |         try:
 32 |             os.remove(name)
 33 |         except OSError as e:
 34 |             if e.errno == 2:
 35 |                 pass
 36 |             else:
 37 |                 raise e
 38 | 
 39 | 
 40 | @contextmanager
 41 | def open_atomic(filepath, *args, **kwargs):
 42 |     """ Open temporary file object that atomically moves to destination upon
 43 |     exiting.
 44 |     Allows reading and writing to and from the same filename.
 45 |     Parameters
 46 |     ----------
 47 |     filepath : string
 48 |         the file path to be opened
 49 |     fsync : bool
 50 |         whether to force write the file to disk
 51 |     kwargs : mixed
 52 |         Any valid keyword arguments for :code:`open`
 53 |     """
 54 |     fsync = kwargs.pop('fsync', False)
 55 | 
 56 |     with _tempfile(dir=os.path.dirname(filepath)) as tmppath:
 57 |         with open(tmppath, *args, **kwargs) as f:
 58 |             yield f
 59 |             if fsync:
 60 |                 f.flush()
 61 |                 os.fsync(f.fileno())
 62 |         os.rename(tmppath, filepath)
 63 | 
 64 | def safe_pickle_dump(obj, fname):
 65 |     """
 66 |     prevents a case where one process could be writing a pickle file
 67 |     while another process is reading it, causing a crash. the solution
 68 |     is to write the pickle file to a temporary file and then move it.
 69 |     """
 70 |     with open_atomic(fname, 'wb') as f:
 71 |         pickle.dump(obj, f, -1) # -1 specifies highest binary protocol
 72 | 
 73 | # -----------------------------------------------------------------------------
 74 | 
 75 | class CompressedSqliteDict(SqliteDict):
 76 |     """ overrides the encode/decode methods to use zlib, so we get compressed storage """
 77 | 
 78 |     def __init__(self, *args, **kwargs):
 79 | 
 80 |         def encode(obj):
 81 |             return sqlite3.Binary(zlib.compress(pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)))
 82 | 
 83 |         def decode(obj):
 84 |             return pickle.loads(zlib.decompress(bytes(obj)))
 85 | 
 86 |         super().__init__(*args, **kwargs, encode=encode, decode=decode)
 87 | 
 88 | # -----------------------------------------------------------------------------
 89 | """
 90 | some docs to self:
 91 | flag='c': default mode, open for read/write, and creating the db/table if necessary
 92 | flag='r': open for read-only
 93 | """
 94 | 
 95 | # stores info about papers, and also their lighter-weight metadata
 96 | PAPERS_DB_FILE = os.path.join(DATA_DIR, 'papers.db')
 97 | # stores account-relevant info, like which tags exist for which papers
 98 | DICT_DB_FILE = os.path.join(DATA_DIR, 'dict.db')
 99 | 
100 | def get_papers_db(flag='r', autocommit=True):
101 |     assert flag in ['r', 'c']
102 |     pdb = CompressedSqliteDict(PAPERS_DB_FILE, tablename='papers', flag=flag, autocommit=autocommit)
103 |     return pdb
104 | 
105 | def get_metas_db(flag='r', autocommit=True):
106 |     assert flag in ['r', 'c']
107 |     mdb = SqliteDict(PAPERS_DB_FILE, tablename='metas', flag=flag, autocommit=autocommit)
108 |     return mdb
109 | 
110 | def get_tags_db(flag='r', autocommit=True):
111 |     assert flag in ['r', 'c']
112 |     tdb = CompressedSqliteDict(DICT_DB_FILE, tablename='tags', flag=flag, autocommit=autocommit)
113 |     return tdb
114 | 
115 | def get_last_active_db(flag='r', autocommit=True):
116 |     assert flag in ['r', 'c']
117 |     ladb = SqliteDict(DICT_DB_FILE, tablename='last_active', flag=flag, autocommit=autocommit)
118 |     return ladb
119 | 
120 | def get_email_db(flag='r', autocommit=True):
121 |     assert flag in ['r', 'c']
122 |     edb = SqliteDict(DICT_DB_FILE, tablename='email', flag=flag, autocommit=autocommit)
123 |     return edb
124 | 
125 | # -----------------------------------------------------------------------------
126 | """
127 | our "feature store" is currently just a pickle file, may want to consider hdf5 in the future
128 | """
129 | 
130 | # stores tfidf features a bunch of other metadata
131 | FEATURES_FILE = os.path.join(DATA_DIR, 'features.p')
132 | 
133 | def save_features(features):
134 |     """ takes the features dict and save it to disk in a simple pickle file """
135 |     safe_pickle_dump(features, FEATURES_FILE)
136 | 
137 | def load_features():
138 |     """ loads the features dict from disk """
139 |     with open(FEATURES_FILE, 'rb') as f:
140 |         features = pickle.load(f)
141 |     return features
142 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # AiLert ![logo.svg](static/logo.svg)
  2 | 
  3 | An open-source AI newsletter platform that aggregates and curates AI content from across the internet.
  4 | 
  5 | ## Overview
  6 | AiLert automatically aggregates content from 150+ sources including research papers, news sites, GitHub repositories, and events to create customizable AI newsletters. Built with Python and powered by AWS, it helps communities and teams stay updated with the latest in AI.
  7 | 
  8 | ## Features
  9 | - 📚 Multi-source aggregation (150+ sources)
 10 | - 🎯 Smart content categorization
 11 | - 📊 Engagement tracking
 12 | - ⚡ Async content processing
 13 | - 📧 Customizable newsletter templates
 14 | - 📅 Daily and weekly digest options
 15 | 
 16 | ## Content Sources
 17 | - Research Papers (arXiv)
 18 | - Industry News (RSS feeds)
 19 | - GitHub Trending Repositories
 20 | - AI Competitions & Events
 21 | - Product Launches
 22 | - Technical Blogs
 23 | 
 24 | ## Tech Stack
 25 | - Python 3.8+
 26 | - Flask
 27 | - AWS DynamoDB
 28 | - BeautifulSoup4
 29 | - Feedparser
 30 | - Schedule
 31 | - Pydantic
 32 | - uvicorn
 33 | 
 34 | ## 📫 How to Subscribe
 35 | 
 36 | 1. Visit https://ailert.tech
 37 | 2. Navigate to the newsletter section
 38 | 3. Enter your email address
 39 | 4. Confirm your subscription
 40 | 
 41 | ## ✨ What Our Readers Say
 42 | 
 43 | `"AIlert's newsletter helps me stay on top of AI developments without getting overwhelmed" - Tech Lead at Fortune 500`
 44 | 
 45 | 
 46 | `"The perfect blend of technical depth and practical insights" - AI Researcher`
 47 | 
 48 | ## 🔒 Your Privacy Matters
 49 | 
 50 | - No spam, ever
 51 | - Unsubscribe anytime
 52 | - Your data is never shared or sold
 53 | 
 54 | ## 📅 Publication Schedule
 55 | Receive our carefully curated insights every week, delivered straight to your inbox.
 56 | 
 57 | ## Installation
 58 | 
 59 | 1. Clone the repository:
 60 | ```bash
 61 | git clone https://github.com/yourusername/ailert.git
 62 | cd ailert
 63 | ```
 64 | 
 65 | 2. Install dependencies:
 66 | ```bash
 67 | pip install -r requirements.txt
 68 | ```
 69 | 
 70 | 3. Set up AWS credentials:
 71 | ```bash
 72 | export AWS_ACCESS_KEY_ID="your_access_key"
 73 | export AWS_SECRET_ACCESS_KEY="your_secret_key"
 74 | export AWS_REGION="your_region"
 75 | ```
 76 | 
 77 | 4. Run the application:
 78 | ```bash
 79 | python main.py
 80 | ```
 81 | 
 82 | ## Project Structure
 83 | ```
 84 | ailert/
 85 | ├── builder/            # Newsletter generation
 86 | ├── db_handler/         # Db operations manager
 87 | ├── app/                # Core functions of the application
 88 | ├── router/             # REST Api routes
 89 | ├── services/           # Content aggregation services
 90 | ├── static/             # Templates and assets
 91 | ├── utils/              # Application common utilities
 92 | ├── main.py             # Flask application
 93 | └── requirements.txt    # Dependencies
 94 | ```
 95 | 
 96 | ## Contributing
 97 | We welcome contributions of all kinds! Here are some ways you can help:
 98 | 
 99 | ### Development
100 | - Add new content sources
101 | - Improve content categorization
102 | - Optimize performance
103 | - Add new features
104 | - Fix bugs
105 | - Write tests
106 | 
107 | ### Documentation
108 | - Improve technical docs
109 | - Write tutorials
110 | - Add code comments
111 | - Create examples
112 | 
113 | ### Design
114 | - Improve newsletter templates
115 | - Create visual assets
116 | - Enhance UI/UX
117 | 
118 | ### Content
119 | - Add new RSS feeds
120 | - Improve content filtering
121 | - Suggest new features
122 | 
123 | ## Getting Started with Contributing
124 | 
125 | 1. Fork the repository
126 | 2. Create a new branch
127 | ```bash
128 | git checkout -b feature/your-feature
129 | ```
130 | 3. Make your changes
131 | 4. Write or update tests
132 | 5. Submit a pull request
133 | 
134 | ### Development Setup
135 | 1. Install development dependencies:
136 | ```bash
137 | pip install -r requirements-dev.txt
138 | ```
139 | 
140 | 2. Run tests:
141 | ```bash
142 | python -m pytest
143 | ```
144 | 
145 | ## API Documentation
146 | 
147 | ### Newsletter Builder
148 | ```python
149 | from builder.builder import NewsletterBuilder
150 | 
151 | # Create daily newsletter
152 | daily = NewsletterBuilder({
153 |     "gh_url": "github_url",
154 |     "gh_ftype": "daily"
155 | })
156 | daily.set_sections(["news"])
157 | content = await daily.section_generator()
158 | ```
159 | 
160 | ### Content Services
161 | Each service handles different content types:
162 | - `NewsService`: Industry news
163 | - `ResearchService`: Research papers
164 | - `GitHubScanner`: Trending repositories
165 | - `ProductService`: New AI products
166 | - `CompetitionService`: AI competitions
167 | - `EventsService`: Upcoming events
168 | 
169 | ## License
170 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
171 | 
172 | ## Acknowledgments
173 | - All our amazing contributors
174 | - The open-source community
175 | - RSS feed providers
176 | - Content creators
177 | 
178 | ## Contact
179 | - Create an issue for bug reports
180 | - Start a discussion for feature requests
181 | - Join our Discord community [link]
182 | 
183 | ## Roadmap
184 | - [ ] Add more content sources
185 | - [ ] Implement ML-based content ranking
186 | - [ ] Add personalization options
187 | - [ ] Create API endpoints
188 | - [ ] Add email delivery system
189 | - [ ] Improve template customization
190 | 
191 | ---
192 | Built with ❤️ for the AI community


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity
 10 | and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the
 26 |   overall community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 |   advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 |   address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement.
 63 | All complaints will be reviewed and investigated promptly and fairly.
 64 | 
 65 | All community leaders are obligated to respect the privacy and security of the
 66 | reporter of any incident.
 67 | 
 68 | ## Enforcement Guidelines
 69 | 
 70 | Community leaders will follow these Community Impact Guidelines in determining
 71 | the consequences for any action they deem in violation of this Code of Conduct:
 72 | 
 73 | ### 1. Correction
 74 | 
 75 | **Community Impact**: Use of inappropriate language or other behavior deemed
 76 | unprofessional or unwelcome in the community.
 77 | 
 78 | **Consequence**: A private, written warning from community leaders, providing
 79 | clarity around the nature of the violation and an explanation of why the
 80 | behavior was inappropriate. A public apology may be requested.
 81 | 
 82 | ### 2. Warning
 83 | 
 84 | **Community Impact**: A violation through a single incident or series
 85 | of actions.
 86 | 
 87 | **Consequence**: A warning with consequences for continued behavior. No
 88 | interaction with the people involved, including unsolicited interaction with
 89 | those enforcing the Code of Conduct, for a specified period of time. This
 90 | includes avoiding interactions in community spaces as well as external channels
 91 | like social media. Violating these terms may lead to a temporary or
 92 | permanent ban.
 93 | 
 94 | ### 3. Temporary Ban
 95 | 
 96 | **Community Impact**: A serious violation of community standards, including
 97 | sustained inappropriate behavior.
 98 | 
 99 | **Consequence**: A temporary ban from any sort of interaction or public
100 | communication with the community for a specified period of time. No public or
101 | private interaction with the people involved, including unsolicited interaction
102 | with those enforcing the Code of Conduct, is allowed during this period.
103 | Violating these terms may lead to a permanent ban.
104 | 
105 | ### 4. Permanent Ban
106 | 
107 | **Community Impact**: Demonstrating a pattern of violation of community
108 | standards, including sustained inappropriate behavior,  harassment of an
109 | individual, or aggression toward or disparagement of classes of individuals.
110 | 
111 | **Consequence**: A permanent ban from any sort of public interaction within
112 | the community.
113 | 
114 | ## Attribution
115 | 
116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
117 | version 2.0, available at
118 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
119 | 
120 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
121 | enforcement ladder](https://github.com/mozilla/diversity).
122 | 
123 | [homepage]: https://www.contributor-covenant.org
124 | 
125 | For answers to common questions about this code of conduct, see the FAQ at
126 | https://www.contributor-covenant.org/faq. Translations are available at
127 | https://www.contributor-covenant.org/translations.
128 | 


--------------------------------------------------------------------------------
/utils/utility.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import csv
  4 | import hashlib
  5 | import logging
  6 | from pathlib import Path
  7 | from datetime import datetime
  8 | from typing import Any, Dict, List, Optional
  9 | 
 10 | 
 11 | def load_template(template_path="static/newsletter.html") -> str:
 12 |     with open(template_path, 'r') as f:
 13 |         return f.read()
 14 | 
 15 | def generate_deterministic_id(item: Dict[str, Any], key_fields: List[str], prefix: str = "item") -> str:
 16 |     """
 17 |     Example:
 18 |         item = {
 19 |             "product_name": "Widget",
 20 |             "color": "blue",
 21 |             "timestamp": "2024-01-01"
 22 |         }
 23 |         id = generate_deterministic_id(
 24 |             item,
 25 |             key_fields=["product_name", "color"],
 26 |             prefix="prod"
 27 |         )
 28 |         # Result: prod-a1b2c3d4...
 29 |     """
 30 |     key_fields.sort()
 31 |     values = []
 32 |     for field in key_fields:
 33 |         if field not in item:
 34 |             raise KeyError(f"Required field '{field}' not found in item")
 35 |         value = item[field]
 36 |         values.append(str(value))
 37 | 
 38 |     combined_string = "||".join(values)
 39 |     hash_object = hashlib.sha256(combined_string.encode())
 40 |     hash_hex = hash_object.hexdigest()
 41 |     short_hash = hash_hex[:12]
 42 |     return f"{prefix}-{short_hash}"
 43 | 
 44 | def truncate_text(text: str, max_length: int = 200) -> str:
 45 |     """Truncate text to specified length at the nearest word boundary."""
 46 |     if len(text) <= max_length:
 47 |         return text
 48 |     truncated = text[:max_length].rsplit(' ', 1)[0]
 49 |     return truncated.rstrip('.,!?:;')
 50 | 
 51 | def get_formatted_timestamp():
 52 |     """Get current timestamp in YYYY-MM-DD format"""
 53 |     return datetime.now().strftime("%Y-%m-%d")
 54 | 
 55 | 
 56 | def is_valid_email(email):
 57 |     """Validate email format"""
 58 |     pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
 59 |     return re.match(pattern, email) is not None
 60 | 
 61 | 
 62 | def save_to_csv(email):
 63 |     csv_file = 'db_handler/vault/recipients.csv'
 64 |     file_exists = os.path.exists(csv_file)
 65 | 
 66 |     try:
 67 |         with open(csv_file, 'a', newline='') as file:
 68 |             writer = csv.writer(file)
 69 |             if not file_exists:
 70 |                 writer.writerow(['email', 'subscribed_at'])
 71 |             writer.writerow([email, get_formatted_timestamp()])
 72 |         return True
 73 |     except Exception as e:
 74 |         logging.error(f"Error saving to CSV: {str(e)}")
 75 |         return False
 76 | 
 77 | 
 78 | def is_email_subscribed(email):
 79 |     """Check if email already exists in CSV"""
 80 |     csv_file = 'db_handler/vault/recipients.csv'
 81 |     if not os.path.exists(csv_file):
 82 |         return False
 83 | 
 84 |     try:
 85 |         with open(csv_file, 'r') as file:
 86 |             reader = csv.reader(file)
 87 |             next(reader)  # Skip header
 88 |             return any(row[0] == email for row in reader)
 89 |     except Exception as e:
 90 |         logging.error(f"Error checking subscription: {str(e)}")
 91 |         return False
 92 | 
 93 | 
 94 | def inline_css(html_content: str, css_path: Optional[str] = None) -> str:
 95 |     """Replace CSS link tags with the actual CSS content in the HTML string."""
 96 |     css_link_pattern = r'<link[^>]+rel="stylesheet"[^>]+href="([^"]+)"[^>]*>'
 97 | 
 98 |     def replace_css_link(match):
 99 |         css_file = match.group(1)
100 | 
101 |         # If css_path is provided, use it, otherwise look in current directory
102 |         if css_path:
103 |             css_file_path = Path(css_path) / Path(css_file).name
104 |         else:
105 |             css_file_path = Path(css_file)
106 | 
107 |         try:
108 |             with open(css_file_path, 'r', encoding='utf-8') as f:
109 |                 css_content = f.read()
110 |                 return f'<style>\n{css_content}\n</style>'
111 |         except FileNotFoundError:
112 |             print(f"Warning: CSS file not found: {css_file_path}")
113 |             return match.group(0)  # Keep original link tag if file not found
114 |         except Exception as e:
115 |             print(f"Error reading CSS file: {e}")
116 |             return match.group(0)
117 | 
118 |     # Replace all CSS link tags with style tags
119 |     return re.sub(css_link_pattern, replace_css_link, html_content)
120 | 
121 | 
122 | def inline_svg_images(html_content: str, svg_path: Optional[str] = None) -> str:
123 |     """Replace SVG image tags with the actual SVG content in the HTML string."""
124 |     img_pattern = r'<img[^>]+src="([^"]+\.svg)"[^>]*>'
125 | 
126 |     def replace_img_tag(match):
127 |         # Get the full img tag and the src value
128 |         img_tag = match.group(0)
129 |         svg_file = match.group(1)
130 | 
131 |         # Extract the class and alt attributes if they exist
132 |         class_match = re.search(r'class="([^"]+)"', img_tag)
133 |         alt_match = re.search(r'alt="([^"]+)"', img_tag)
134 | 
135 |         class_attr = f' class="{class_match.group(1)}"' if class_match else ''
136 |         alt_attr = f' aria-label="{alt_match.group(1)}"' if alt_match else ''
137 | 
138 |         # If svg_path is provided, use it, otherwise look in current directory
139 |         if svg_path:
140 |             svg_file_path = Path(svg_path) / Path(svg_file).name
141 |         else:
142 |             svg_file_path = Path(svg_file)
143 | 
144 |         try:
145 |             with open(svg_file_path, 'r', encoding='utf-8') as f:
146 |                 svg_content = f.read()
147 |                 svg_content = svg_content.replace('<svg ', f'<svg{class_attr}{alt_attr} ')
148 |                 return svg_content
149 |         except FileNotFoundError:
150 |             print(f"Warning: SVG file not found: {svg_file_path}")
151 |             return img_tag  # Keep original img tag if file not found
152 |         except Exception as e:
153 |             print(f"Error reading SVG file: {e}")
154 |             return img_tag
155 |     return re.sub(img_pattern, replace_img_tag, html_content)
156 | 


--------------------------------------------------------------------------------
/services/news_service.py:
--------------------------------------------------------------------------------
  1 | import pytz
  2 | import logging
  3 | import feedparser
  4 | import numpy as np
  5 | import concurrent.futures
  6 | from bs4 import BeautifulSoup
  7 | from datetime import datetime
  8 | from typing import Dict, List
  9 | from db_handler import NewsItem
 10 | from email.utils import parsedate_to_datetime
 11 | from sklearn.feature_extraction.text import TfidfVectorizer
 12 | 
 13 | logging.basicConfig(
 14 |     level=logging.INFO,
 15 |     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 16 | )
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | 
 20 | class NewsService:
 21 |     def __init__(self, rss_urls: List[str]):
 22 |         self.rss_urls = rss_urls
 23 |         self.tfidf = TfidfVectorizer(
 24 |             max_features=1000,
 25 |             stop_words='english',
 26 |             ngram_range=(1, 2)
 27 |         )
 28 |         self.summary = []
 29 |         self.news = []
 30 | 
 31 |     def _clean_html(self, text: str) -> str:
 32 |         if not text:
 33 |             return ''
 34 |         soup = BeautifulSoup(text, 'html.parser')
 35 |         return soup.get_text().strip()
 36 | 
 37 |     def _parse_date(self, date_str: str) -> datetime:
 38 |         try:
 39 |             parsed_date = parsedate_to_datetime(date_str)
 40 |             return parsed_date.replace(tzinfo=pytz.UTC)
 41 |         except:
 42 |             return datetime.min.replace(tzinfo=pytz.UTC)
 43 | 
 44 |     def _fetch_feed(self, url: str) -> List[Dict]:
 45 |         try:
 46 |             feed = feedparser.parse(url)
 47 |             news_items = []
 48 | 
 49 |             for entry in feed.entries:
 50 |                 description = entry.get('description', '')
 51 |                 if not description and 'content' in entry:
 52 |                     description = entry.content[0].value
 53 | 
 54 |                 additional_info = {
 55 |                     'published_date': self._parse_date(entry.get('published', '')),
 56 |                     'author': entry.get('author', None),
 57 |                     'categories': entry.get('tags', []),
 58 |                     'guid': entry.get('id', None)
 59 |                 }
 60 | 
 61 |                 item = {
 62 |                     'title': entry.get('title', ''),
 63 |                     'description': self._clean_html(description),
 64 |                     'link': entry.get('link', ''),
 65 |                     'source': feed.feed.get('title', 'Unknown Source'),
 66 |                     'engagement': None,  # Can be updated if engagement metrics are available
 67 |                     'additional_info': additional_info,
 68 |                     'full_text': f"{entry.get('title', '')} {self._clean_html(description)}"  # for ranking
 69 |                 }
 70 | 
 71 |                 news_items.append(item)
 72 | 
 73 |             return news_items
 74 |         except Exception as e:
 75 |             print(f"Error fetching feed {url}: {str(e)}")
 76 |             return []
 77 | 
 78 |     def _calculate_importance_scores(self, news_items: List[Dict]) -> List[float]:
 79 |         if not news_items:
 80 |             return []
 81 |         try:
 82 |             texts = [item['full_text'] for item in news_items]
 83 |             x = self.tfidf.fit_transform(texts)
 84 |             doc_lengths = x.sum(axis=1).A1
 85 |             term_importance = np.sqrt(np.asarray(x.mean(axis=0)).ravel())
 86 |             scores = doc_lengths * np.dot(x.toarray(), term_importance)
 87 |             if len(scores) > 0:
 88 |                 scores = (scores - scores.min()) / (scores.max() - scores.min() + 1e-8)
 89 |             return scores.tolist()
 90 |         except Exception as e:
 91 |             logger.error(f"Error calculating importance scores: {str(e)}")
 92 |             raise RuntimeError(f"Failed to calculate importance scores: {str(e)}")
 93 | 
 94 |     def _calculate_read_time(self, text: str, words_per_minute: int = 200) -> int:
 95 |         words = len(text.strip().split())
 96 |         total_minutes = words / words_per_minute
 97 |         minutes = int(total_minutes)
 98 |         seconds = int((total_minutes - minutes) * 60)
 99 |         return minutes
100 | 
101 |     async def get_highlights(self, max_items: int = 5) -> List[NewsItem]:
102 |         today = datetime.now(pytz.UTC)
103 |         all_news = []
104 |         with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
105 |             future_to_url = {
106 |                 executor.submit(self._fetch_feed, url): url
107 |                 for url in self.rss_urls
108 |             }
109 | 
110 |             for future in concurrent.futures.as_completed(future_to_url):
111 |                 news_items = future.result()
112 |                 all_news.extend(news_items)
113 | 
114 |         today_news = [
115 |             item for item in all_news
116 |             if item['additional_info']['published_date'].date() == today.date()
117 |         ]
118 | 
119 |         if not today_news:
120 |             return []
121 | 
122 |         importance_scores = self._calculate_importance_scores(today_news)
123 | 
124 |         for item, score in zip(today_news, importance_scores):
125 |             item['additional_info']['importance_score'] = float(score)
126 | 
127 |         if len(today_news) > 1:
128 |             sorted_news = sorted(
129 |                 today_news,
130 |                 key=lambda x: (
131 |                     x['additional_info']['importance_score'],
132 |                     x['additional_info']['published_date']
133 |                 ),
134 |                 reverse=True
135 |             )
136 |         else:
137 |             sorted_news = today_news
138 | 
139 |         for item in sorted_news[:max_items]:
140 |             read_time = self._calculate_read_time(item['description'])
141 |             self.news.append(NewsItem(
142 |                 title=item['title'],
143 |                 description=item['description'],
144 |                 link=item['link'],
145 |                 read_time=read_time,
146 |                 source=item['source'],
147 |                 engagement=item['engagement'],
148 |                 additional_info=item['additional_info']
149 |             ))
150 |             self.summary.append({"title": item['title'], "read_time": read_time})
151 |         return self.summary
152 | 
153 |     async def get_news(self):
154 |         return self.news
155 | 


--------------------------------------------------------------------------------
/services/event_service.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import feedparser
  3 | import requests
  4 | from bs4 import BeautifulSoup
  5 | from typing import List, Dict
  6 | from db_handler import Event, sites
  7 | 
  8 | logger = logging.getLogger(__name__)
  9 | logging.basicConfig(level=logging.INFO)
 10 | 
 11 | class EventsService:
 12 |     def __init__(self, rss_feed_url=sites["events_feed"], html_links=sites["events_url"], top_n=3):
 13 |         self.rss_feed_url = rss_feed_url
 14 |         self.html_links = html_links  # Fixed variable name from html_link to html_links
 15 |         self.top_n = top_n
 16 |         self.events = []
 17 |         self.headers = {
 18 |             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
 19 |             'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
 20 |             'Accept-Language': 'en-US,en;q=0.5',
 21 |             'Connection': 'keep-alive',
 22 |         }
 23 | 
 24 |     def _get_events_from_rss_feed(self) -> List[Dict]:
 25 |         try:
 26 |             feed = feedparser.parse(self.rss_feed_url)
 27 |             if not feed.entries:
 28 |                 logger.warning(f"No entries found in RSS feed: {self.rss_feed_url}")
 29 |                 return []
 30 | 
 31 |             events = []
 32 |             for entry in feed.entries[:self.top_n]:
 33 |                 event = {
 34 |                     "title": entry.get('title', ''),
 35 |                     "description": entry.get('description', ''),
 36 |                     "date": entry.get('published', ''),
 37 |                     "location": "",  # RSS feed might not have location
 38 |                     "engagement": 0
 39 |                 }
 40 |                 events.append(event)
 41 |             return events
 42 |         except Exception as e:
 43 |             logger.error(f"Error parsing RSS feed: {e}")
 44 |             return []
 45 | 
 46 |     def _get_events_from_html_link(self) -> List[Dict]:  # Fixed method name typo
 47 |         events = []
 48 |         for url in self.html_links:
 49 |             try:
 50 |                 response = requests.get(url, headers=self.headers, timeout=10)
 51 |                 response.raise_for_status()
 52 |                 soup = BeautifulSoup(response.text, 'html.parser')
 53 | 
 54 |                 if "conferencealerts" in url:
 55 |                     # Updated selector based on current site structure
 56 |                     events.extend(self._parse_conference_alerts(soup))
 57 |                 elif "aideadlin.es" in url:
 58 |                     events.extend(self._parse_aideadlines(soup))
 59 | 
 60 |                 if len(events) >= self.top_n:
 61 |                     return events[:self.top_n]
 62 |             except requests.RequestException as e:
 63 |                 logger.error(f"Error fetching {url}: {e}")
 64 |                 continue
 65 |             except Exception as e:
 66 |                 logger.error(f"Error processing {url}: {e}")
 67 |                 continue
 68 |         return events
 69 | 
 70 |     def _parse_conference_alerts(self, soup: BeautifulSoup) -> List[Dict]:
 71 |         events = []
 72 |         # Updated selectors based on current site structure
 73 |         items = soup.find_all('div', class_='conference-item')  # Changed from 'event-item'
 74 | 
 75 |         if not items:
 76 |             # Fallback to alternative selectors
 77 |             items = soup.find_all('div', class_='conf-item')
 78 | 
 79 |         for item in items:
 80 |             try:
 81 |                 title_elem = item.find(['h2', 'h3', 'h4']) or item.find(class_='conf-title')
 82 |                 date_elem = item.find(class_=['date', 'conf-date'])
 83 |                 location_elem = item.find(class_=['location', 'conf-location'])
 84 |                 desc_elem = item.find(class_=['description', 'conf-description'])
 85 | 
 86 |                 if not title_elem:
 87 |                     continue
 88 | 
 89 |                 event = {
 90 |                     "title": title_elem.text.strip(),
 91 |                     "date": date_elem.text.strip() if date_elem else "",
 92 |                     "location": location_elem.text.strip() if location_elem else "",
 93 |                     "description": desc_elem.text.strip() if desc_elem else "",
 94 |                     "engagement": 0  # Default value if not found
 95 |                 }
 96 |                 events.append(event)
 97 |             except Exception as e:
 98 |                 logger.error(f"Error parsing conference alert item: {e}")
 99 |                 continue
100 |         return events
101 | 
102 |     def _parse_aideadlines(self, soup: BeautifulSoup) -> List[Dict]:
103 |         events = []
104 |         items = soup.select('.conference-item, .deadline-item')
105 | 
106 |         for item in items:
107 |             try:
108 |                 title_elem = item.find(['h3', 'h4']) or item.select_one('.conf-title')
109 |                 date_elem = item.select_one('.deadline, .date')
110 |                 location_elem = item.select_one('.location, .venue')
111 |                 desc_elem = item.select_one('.description, .abstract')
112 | 
113 |                 if not title_elem:
114 |                     continue
115 | 
116 |                 event = {
117 |                     "title": title_elem.text.strip(),
118 |                     "date": date_elem.text.strip() if date_elem else "",
119 |                     "location": location_elem.text.strip() if location_elem else "",
120 |                     "description": desc_elem.text.strip() if desc_elem else "",
121 |                     "engagement": 0  # Default if not found
122 |                 }
123 |                 events.append(event)
124 |             except Exception as e:
125 |                 logger.error(f"Error parsing aideadlines item: {e}")
126 |                 continue
127 |         return events
128 | 
129 |     async def get_upcoming_events(self):
130 |         # Get events from both sources
131 |         html_events = self._get_events_from_html_link()
132 |         rss_events = self._get_events_from_rss_feed()
133 | 
134 |         # Combine and deduplicate events
135 |         temp_dict = {event["title"]: event for event in html_events + rss_events}
136 |         temp_list = list(temp_dict.values())
137 | 
138 |         # Create Event objects
139 |         new_events = [
140 |             Event(
141 |                 title=event["title"],
142 |                 date=event["date"],
143 |                 location=event["location"],
144 |                 description=event["description"]
145 |             ) for event in temp_list[:self.top_n]
146 |         ]
147 | 
148 |         self.events.extend(new_events)
149 |         return self.events


--------------------------------------------------------------------------------
/services/apps/arx_service.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import random
  3 | import logging
  4 | import urllib.request
  5 | import feedparser
  6 | import numpy as np
  7 | from sklearn import svm
  8 | from typing import List, Dict, Any, Optional, Tuple
  9 | 
 10 | 
 11 | class ArxivScanner:
 12 |     def __init__(self, base_url: str, top_n: int = 5):
 13 |         self.base_url = base_url
 14 |         self.top_n = top_n
 15 |         self.logger = logging.getLogger(__name__)
 16 |         self.default_query = 'cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.AI+OR+cat:cs.NE+OR+cat:cs.RO'
 17 | 
 18 |     def _get_response(self, search_query: str, start_index: int = 0) -> bytes:
 19 |         query_url = f'{self.base_url}search_query={search_query}&sortBy=lastUpdatedDate&start={start_index}&max_results=100'
 20 | 
 21 |         with urllib.request.urlopen(query_url) as url:
 22 |             response = url.read()
 23 |             if url.status != 200:
 24 |                 raise Exception(f"ArXiv API returned status {url.status}")
 25 |         return response
 26 | 
 27 |     def _parse_arxiv_url(self, url: str) -> tuple:
 28 |         idv = url[url.rfind('/') + 1:]
 29 |         parts = idv.split('v')
 30 |         return idv, parts[0], int(parts[1])
 31 | 
 32 |     def _parse_response(self, response: bytes) -> List[Dict[str, Any]]:
 33 |         def encode_feedparser_dict(d):
 34 |             if isinstance(d, feedparser.FeedParserDict) or isinstance(d, dict):
 35 |                 return {k: encode_feedparser_dict(d[k]) for k in d.keys()}
 36 |             elif isinstance(d, list):
 37 |                 return [encode_feedparser_dict(k) for k in d]
 38 |             return d
 39 | 
 40 |         papers = []
 41 |         parse = feedparser.parse(response)
 42 | 
 43 |         for entry in parse.entries:
 44 |             paper = encode_feedparser_dict(entry)
 45 |             idv, raw_id, version = self._parse_arxiv_url(paper['id'])
 46 | 
 47 |             paper['_idv'] = idv
 48 |             paper['_id'] = raw_id
 49 |             paper['_version'] = version
 50 |             paper['_time'] = time.mktime(paper['updated_parsed'])
 51 |             paper['_time_str'] = time.strftime('%b %d %Y', paper['updated_parsed'])
 52 | 
 53 |             papers.append(paper)
 54 | 
 55 |         return papers
 56 | 
 57 |     def rank_papers(self, papers: List[Dict], method: str = 'time',
 58 |                     query: str = None) -> List[Tuple[Dict, float]]:
 59 |         if not papers:
 60 |             return []
 61 | 
 62 |         if method == 'time':
 63 |             scored_papers = [(p, -p['_time']) for p in papers]
 64 | 
 65 |         elif method == 'random':
 66 |             scored_papers = [(p, random.random()) for p in papers]
 67 | 
 68 |         elif method == 'search' and query:
 69 |             query_terms = query.lower().strip().split()
 70 |             scored_papers = []
 71 | 
 72 |             for p in papers:
 73 |                 score = 0.0
 74 |                 score += 20.0 * sum(1 for term in query_terms if term in p['title'].lower())
 75 |                 score += 10.0 * sum(
 76 |                     1 for term in query_terms if term in ' '.join(a['name'].lower() for a in p['authors']))
 77 |                 score += 5.0 * sum(1 for term in query_terms if term in p['summary'].lower())
 78 |                 scored_papers.append((p, score))
 79 | 
 80 |         elif method == 'svm':
 81 |             from sklearn.feature_extraction.text import TfidfVectorizer
 82 | 
 83 |             # Prepare text data
 84 |             texts = []
 85 |             times = []
 86 |             for p in papers:
 87 |                 try:
 88 |                     title = p['title']
 89 |                     authors = ' '.join(a['name'] for a in p['authors'])
 90 |                     summary = p.get('summary', '')
 91 |                     texts.append(f"{title} {authors} {summary}")
 92 |                     times.append(-p['_time'])  # Negative time for more recent = higher score
 93 |                 except Exception as e:
 94 |                     self.logger.error(f"Error processing paper: {e}")
 95 |                     continue
 96 | 
 97 |             if not texts:
 98 |                 return [(p, 0.0) for p in papers]
 99 | 
100 |             # Create TF-IDF features
101 |             vectorizer = TfidfVectorizer(
102 |                 max_features=1000,
103 |                 stop_words='english'
104 |             )
105 |             X = vectorizer.fit_transform(texts)
106 | 
107 |             # Create binary labels based on median time
108 |             median_time = np.median(times)
109 |             y = np.array([1 if t > median_time else 0 for t in times])
110 | 
111 |             # Train SVM
112 |             clf = svm.LinearSVC(
113 |                 class_weight='balanced',
114 |                 random_state=42,
115 |                 max_iter=10000
116 |             )
117 | 
118 |             try:
119 |                 clf.fit(X, y)
120 |                 scores = clf.decision_function(X)
121 |                 scored_papers = []
122 |                 for paper, score in zip(papers, scores):
123 |                     scored_papers.append((paper, float(score)))
124 |             except Exception as e:
125 |                 self.logger.error(f"Error in SVM ranking: {e}")
126 |                 return [(p, -p['_time']) for p in papers]  # Fallback to time-based ranking
127 | 
128 |         else:
129 |             scored_papers = [(p, -p['_time']) for p in papers]
130 | 
131 |         return sorted(scored_papers, key=lambda x: x[1], reverse=True)
132 | 
133 |     def get_top_n_papers(self, search_query: Optional[str] = None,
134 |                          rank_method: str = 'svm') -> List[Dict[str, Any]]:
135 |         query = search_query or self.default_query
136 |         papers = []
137 |         start_index = 0
138 | 
139 |         while len(papers) < max(100, self.top_n):  # Get more papers for better SVM training
140 |             try:
141 |                 response = self._get_response(query, start_index)
142 |                 batch = self._parse_response(response)
143 |                 if not batch:
144 |                     break
145 |                 papers.extend(batch)
146 |                 start_index += len(batch)
147 |                 time.sleep(1 + random.uniform(0, 3))
148 |             except Exception as e:
149 |                 self.logger.error(f"Error fetching papers: {e}")
150 |                 break
151 |         ranked_papers = self.rank_papers(papers, method=rank_method, query=search_query)
152 | 
153 |         return [{
154 |             'id': p['_id'],
155 |             'title': p['title'],
156 |             'authors': [a['name'] for a in p['authors']],
157 |             'abstract': p['summary'],
158 |             'categories': [t['term'] for t in p['tags']],
159 |             '_time_str': p['_time_str'],
160 |             'url': f"https://arxiv.org/abs/{p['_id']}",
161 |             'pdf_url': f"https://arxiv.org/pdf/{p['_id']}.pdf",
162 |             'score': score,
163 |             'publication': "ARXIV"
164 |         } for p, score in ranked_papers[:self.top_n]]


--------------------------------------------------------------------------------
/db_handler/dynamo.py:
--------------------------------------------------------------------------------
  1 | import uuid
  2 | import boto3
  3 | from utils import utility
  4 | from botocore.exceptions import ClientError
  5 | from typing import Dict, List, Optional, Any
  6 | 
  7 | 
  8 | class Dynamo:
  9 |     def __init__(self, region_name: str):
 10 |         self.dynamodb = boto3.resource('dynamodb', region_name=region_name)
 11 |         self.client = boto3.client('dynamodb', region_name=region_name)
 12 | 
 13 |     def create_table(self,
 14 |                      table_name: str,
 15 |                      key_schema: List[Dict[str, str]],
 16 |                      attribute_definitions: List[Dict[str, str]],
 17 |                      provisioned_throughput: Optional[Dict[str, int]] = None) -> bool:
 18 |         try:
 19 |             if not provisioned_throughput:
 20 |                 provisioned_throughput = {
 21 |                     'ReadCapacityUnits': 5,
 22 |                     'WriteCapacityUnits': 5
 23 |                 }
 24 | 
 25 |             table = self.dynamodb.create_table(
 26 |                 TableName=table_name,
 27 |                 KeySchema=key_schema,
 28 |                 AttributeDefinitions=attribute_definitions,
 29 |                 ProvisionedThroughput=provisioned_throughput
 30 |             )
 31 |             table.wait_until_exists()
 32 |             return True
 33 |         except ClientError as e:
 34 |             print(f"Error creating table: {e}")
 35 |             return False
 36 | 
 37 |     def list_tables(self) -> List[str]:
 38 |         try:
 39 |             return self.client.list_tables()['TableNames']
 40 |         except ClientError as e:
 41 |             print(f"Error listing tables: {e}")
 42 |             return []
 43 | 
 44 |     def describe_table(self, table_name: str) -> Dict:
 45 |         try:
 46 |             return self.client.describe_table(TableName=table_name)
 47 |         except ClientError as e:
 48 |             print(f"Error describing table: {e}")
 49 |             return {}
 50 | 
 51 |     def table_exists(self, table_name: str) -> bool:
 52 |         try:
 53 |             self.client.describe_table(TableName=table_name)
 54 |             return True
 55 |         except ClientError:
 56 |             return False
 57 | 
 58 |     def delete_table(self, table_name: str) -> bool:
 59 |         try:
 60 |             table = self.dynamodb.Table(table_name)
 61 |             table.delete()
 62 |             table.wait_until_not_exists()
 63 |             return True
 64 |         except ClientError as e:
 65 |             print(f"Error deleting table: {e}")
 66 |             return False
 67 | 
 68 |     def add_item(self, table_name: str, partition_key: str, item: Dict[str, Any], auto_id: bool = True) -> str:
 69 |         try:
 70 |             table = self.dynamodb.Table(table_name)
 71 |             if auto_id and 'id' not in item:
 72 |                 item[partition_key] = str(uuid.uuid4())
 73 | 
 74 |             item['created_at'] = utility.get_formatted_timestamp()
 75 |             table.put_item(Item=item)
 76 |             return item.get('id', '')
 77 |         except ClientError as e:
 78 |             print(f"Error adding item: {e}")
 79 |             return ""
 80 | 
 81 |     def get_item(self, table_name: str, key: Dict[str, Any]) -> Dict:
 82 |         try:
 83 |             table = self.dynamodb.Table(table_name)
 84 |             response = table.get_item(Key=key)
 85 |             return response.get('Item', {})
 86 |         except ClientError as e:
 87 |             print(f"Error getting item: {e}")
 88 |             return {}
 89 | 
 90 |     def update_item(self, table_name: str, key: Dict[str, Any], update_attrs: Dict[str, Any]) -> bool:
 91 |         try:
 92 |             table = self.dynamodb.Table(table_name)
 93 | 
 94 |             update_expr_parts = []
 95 |             expr_attr_values = {}
 96 |             expr_attr_names = {}
 97 | 
 98 |             for attr_name, value in update_attrs.items():
 99 |                 attr_parts = attr_name.split('.')
100 |                 update_name = '#' + '_'.join(attr_parts)
101 |                 expr_attr_names[update_name] = attr_parts[-1]
102 | 
103 |                 value_key = ':' + '_'.join(attr_parts)
104 |                 update_expr_parts.append(f"{update_name} = {value_key}")
105 |                 expr_attr_values[value_key] = value
106 | 
107 |             update_expr_parts.append('#updated_at = :updated_at')
108 |             expr_attr_names['#updated_at'] = 'updated_at'
109 |             expr_attr_values[':updated_at'] = utility.get_formatted_timestamp()
110 | 
111 |             update_expression = 'SET ' + ', '.join(update_expr_parts)
112 | 
113 |             table.update_item(
114 |                 Key=key,
115 |                 UpdateExpression=update_expression,
116 |                 ExpressionAttributeValues=expr_attr_values,
117 |                 ExpressionAttributeNames=expr_attr_names
118 |             )
119 |             return True
120 |         except ClientError as e:
121 |             print(f"Error updating item: {e}")
122 |             return False
123 | 
124 |     def delete_item(self, table_name: str, key: Dict[str, Any]) -> bool:
125 |         try:
126 |             table = self.dynamodb.Table(table_name)
127 |             table.delete_item(Key=key)
128 |             return True
129 |         except ClientError as e:
130 |             print(f"Error deleting item: {e}")
131 |             return False
132 | 
133 |     def query_items(self,
134 |                     table_name: str,
135 |                     key_condition_expression: str,
136 |                     expression_values: Dict[str, Any],
137 |                     index_name: Optional[str] = None,
138 |                     filter_expression: Optional[str] = None,
139 |                     limit: Optional[int] = None) -> List[Dict]:
140 |         """
141 |         Query items from the table
142 | 
143 |         Args:
144 |             table_name: Name of the table
145 |             key_condition_expression: KeyConditionExpression for the query
146 |             expression_values: Dictionary of expression values
147 |             index_name: Optional secondary index name
148 |             filter_expression: Optional filter expression
149 |             limit: Optional limit for results
150 |         """
151 |         try:
152 |             table = self.dynamodb.Table(table_name)
153 |             params = {
154 |                 'KeyConditionExpression': key_condition_expression,
155 |                 'ExpressionAttributeValues': expression_values
156 |             }
157 | 
158 |             if index_name:
159 |                 params['IndexName'] = index_name
160 |             if filter_expression:
161 |                 params['FilterExpression'] = filter_expression
162 |             if limit:
163 |                 params['Limit'] = limit
164 | 
165 |             response = table.query(**params)
166 |             return response.get('Items', [])
167 |         except ClientError as e:
168 |             print(f"Error querying items: {e}")
169 |             return []
170 | 
171 |     def scan_items(self,
172 |                    table_name: str,
173 |                    filter_expression: Optional[str] = None,
174 |                    expression_values: Optional[Dict[str, Any]] = None,
175 |                    limit: Optional[int] = None) -> List[Dict]:
176 |         """
177 |         Scan items from the table
178 | 
179 |         Args:
180 |             table_name: Name of the table
181 |             filter_expression: Optional filter expression
182 |             expression_values: Optional dictionary of expression values
183 |             limit: Optional limit for results
184 |         """
185 |         try:
186 |             table = self.dynamodb.Table(table_name)
187 |             params = {}
188 | 
189 |             if filter_expression:
190 |                 params['FilterExpression'] = filter_expression
191 |             if expression_values:
192 |                 params['ExpressionAttributeValues'] = expression_values
193 |             if limit:
194 |                 params['Limit'] = limit
195 | 
196 |             response = table.scan(**params)
197 |             return response.get('Items', [])
198 |         except ClientError as e:
199 |             print(f"Error scanning items: {e}")
200 |             return []


--------------------------------------------------------------------------------
/db_handler/sample_vault/links.py:
--------------------------------------------------------------------------------
  1 | rss_feed = ["https://machinelearningmastery.com/blog/feed/",
  2 | "https://bair.berkeley.edu/blog/feed.xml",
  3 | "http://news.mit.edu/rss/topic/artificial-intelligence2",
  4 | "https://deepmind.com/blog/feed/basic/",
  5 | "https://www.unite.ai/feed/",
  6 | "https://ai2people.com/feed/",
  7 | "https://hanhdbrown.com/feed/",
  8 | "https://dailyai.com/feed/",
  9 | "https://nyheter.aitool.se/feed/",
 10 | "https://www.spritle.com/blog/feed/",
 11 | "https://yatter.in/feed/",
 12 | "https://www.shaip.com/feed/",
 13 | "https://www.greataiprompts.com/feed/",
 14 | "https://zerothprinciples.substack.com/feed",
 15 | "https://airevolution.blog/feed/",
 16 | "https://saal.ai/feed/",
 17 | "https://aicorr.com/feed/",
 18 | "https://qudata.com/en/news/rss.xml",
 19 | "https://hanhdbrown.com/category/ai/feed/",
 20 | "https://www.oreilly.com/radar/topics/ai-ml/feed/index.xml",
 21 | "https://blogs.sas.com/content/topic/artificial-intelligence/feed/",
 22 | "https://blogs.rstudio.com/ai/index.xml",
 23 | "https://www.technologyreview.com/topic/artificial-intelligence/feed",
 24 | "http://www.kdnuggets.com/feed",
 25 | "https://research.aimultiple.com/feed/",
 26 | "https://nanonets.com/blog/rss/",
 27 | "https://www.datarobot.com/blog/feed/",
 28 | "https://becominghuman.ai/feed",
 29 | "https://bigdataanalyticsnews.com/category/artificial-intelligence/feed/",
 30 | "https://blog.kore.ai/rss.xml",
 31 | "https://www.clarifai.com/blog/rss.xml",
 32 | "https://expertsystem.com/feed/",
 33 | "https://theaisummer.com/feed.xml",
 34 | "https://www.aiiottalk.com/feed/",
 35 | "https://www.isentia.com/feed/",
 36 | "https://chatbotslife.com/feed",
 37 | "http://www.marketingaiinstitute.com/blog/rss.xml",
 38 | "https://www.topbots.com/feed/",
 39 | "https://www.artificiallawyer.com/feed/",
 40 | "https://dlabs.ai/feed/",
 41 | "https://www.aitimejournal.com/feed/",
 42 | "https://insights.fusemachines.com/feed/",
 43 | "https://intelligence.org/blog/feed/",
 44 | "https://deepcognition.ai/feed/",
 45 | "https://1reddrop.com/feed/",
 46 | "https://www.viact.ai/blog-feed.xml",
 47 | "https://robotwritersai.com/feed/",
 48 | "https://aihub.org/feed/?cat=-473",
 49 | "https://usmsystems.com/blog/feed/",
 50 | "https://www.aiplusinfo.com/feed/",
 51 | "https://metadevo.com/feed/",
 52 | "https://www.cogitotech.com/feed/",
 53 | "https://datamachina.substack.com/feed",
 54 | "https://vue.ai/blog/feed/",
 55 | "https://www.greatlearning.in/blog/category/artificial-intelligence/feed/",
 56 | "https://topmarketingai.com/feed/",
 57 | "https://appzoon.com/feed/",
 58 | "https://medium.com/feed/@securechainai",
 59 | "https://blogs.microsoft.com/ai/feed/",
 60 | "https://chatbotsmagazine.com/feed",
 61 | "https://findnewai.com/feed/",
 62 | "http://kavita-ganesan.com/feed",
 63 | "https://pandio.com/feed/",
 64 | "https://www.danrose.ai/blog?format=rss",
 65 | "https://www.edia.nl/edia-blog?format=rss",
 66 | "http://www.eledia.org/e-air/feed/",
 67 | "http://ankit-ai.blogspot.com/feeds/posts/default?alt=rss",
 68 | "https://editorialia.com/feed/",
 69 | "http://blog.datumbox.com/feed/",
 70 | "https://daleonai.com/feed.xml",
 71 | "https://binaryinformatics.com/category/ai/feed/",
 72 | "https://www.kochartech.com/feed/",
 73 | "https://medium.com/feed/@Francesco_AI",
 74 | "https://medium.com/feed/archieai",
 75 | "https://medium.com/feed/ai-roadmap-institute",
 76 | "https://docs.microsoft.com/en-us/archive/blogs/machinelearning/feed.xml",
 77 | "https://www.404media.co/rss",
 78 | "https://magazine.sebastianraschka.com/feed",
 79 | "https://aiacceleratorinstitute.com/rss/",
 80 | "https://ai-techpark.com/category/ai/feed/",
 81 | "https://knowtechie.com/category/ai/feed/",
 82 | "https://aimodels.substack.com/feed",
 83 | "https://www.artificialintelligence-news.com/feed/rss/",
 84 | "https://venturebeat.com/category/ai/feed/",
 85 | "https://ainowinstitute.org/category/news/feed",
 86 | "https://siliconangle.com/category/ai/feed",
 87 | "https://aisnakeoil.substack.com/feed",
 88 | "https://www.anaconda.com/blog/feed",
 89 | "https://analyticsindiamag.com/feed/",
 90 | "https://feeds.arstechnica.com/arstechnica/index",
 91 | "https://www.theguardian.com/technology/artificialintelligenceai/rss",
 92 | "https://spacenews.com/tag/artificial-intelligence/feed/",
 93 | "https://futurism.com/categories/ai-artificial-intelligence/feed",
 94 | "https://www.wired.com/feed/tag/ai/latest/rss",
 95 | "https://www.techrepublic.com/rssfeeds/topic/artificial-intelligence/",
 96 | "https://medium.com/feed/artificialis",
 97 | "https://siliconangle.com/category/big-data/feed",
 98 | "https://davidstutz.de/category/blog/feed",
 99 | "https://neptune.ai/blog/feed",
100 | "https://blog.eleuther.ai/index.xml",
101 | "https://pyimagesearch.com/blog/feed",
102 | "https://feeds.bloomberg.com/technology/news.rss",
103 | "https://www.wired.com/feed/category/business/latest/rss",
104 | "https://every.to/chain-of-thought/feed.xml",
105 | "https://huyenchip.com/feed",
106 | "https://news.crunchbase.com/feed",
107 | "https://arxiv.org/rss/cs.CL",
108 | "https://arxiv.org/rss/cs.CV",
109 | "https://arxiv.org/rss/cs.LG",
110 | "https://dagshub.com/blog/rss/",
111 | "https://www.databricks.com/feed",
112 | "https://datafloq.com/feed/?post_type=post",
113 | "https://www.datanami.com/feed/",
114 | "https://debuggercafe.com/feed/",
115 | "https://deephaven.io/blog/rss.xml",
116 | "https://tech.eu/category/deep-tech/feed",
117 | "https://departmentofproduct.substack.com/feed",
118 | "https://www.eetimes.com/feed",
119 | "https://www.engadget.com/rss.xml",
120 | "https://eugeneyan.com/rss/",
121 | "https://explosion.ai/feed",
122 | "https://www.freethink.com/feed/all",
123 | "https://www.generational.pub/feed",
124 | "https://www.forrester.com/blogs/category/artificial-intelligence-ai/feed",
125 | "https://www.ghacks.net/feed/",
126 | "https://gizmodo.com/rss",
127 | "https://globalnews.ca/tag/artificial-intelligence/feed",
128 | "https://gradientflow.com/feed/",
129 | "https://hackernoon.com/tagged/ai/feed",
130 | "https://feeds.feedburner.com/HealthTechMagazine",
131 | "https://huggingface.co/blog/feed.xml",
132 | "https://spectrum.ieee.org/feeds/topic/artificial-intelligence.rss",
133 | "https://feed.infoq.com/ai-ml-data-eng/",
134 | "https://insidebigdata.com/feed",
135 | "https://www.interconnects.ai/feed",
136 | "https://www.ibtimes.com/rss",
137 | "https://www.jmlr.org/jmlr.xml",
138 | "https://www.kdnuggets.com/feed",
139 | "https://blog.langchain.dev/rss/",
140 | "https://lastweekin.ai/feed",
141 | "https://www.latent.space/feed",
142 | "https://www.zdnet.com/topic/artificial-intelligence/rss.xml",
143 | "https://lightning.ai/pages/feed/",
144 | "https://blog.ml.cmu.edu/feed",
145 | "https://www.nature.com/subjects/machine-learning.rss",
146 | "https://www.marktechpost.com/feed",
147 | "https://www.microsoft.com/en-us/research/feed/",
148 | "https://news.mit.edu/topic/mitmachine-learning-rss.xml",
149 | "https://www.technologyreview.com/feed/",
150 | "https://www.newscientist.com/subject/technology/feed/",
151 | "https://phys.org/rss-feed/technology-news/machine-learning-ai/",
152 | "https://techxplore.com/rss-feed/machine-learning-ai-news/",
153 | "https://www.assemblyai.com/blog/rss/",
154 | "https://nicholas.carlini.com/writing/feed.xml",
155 | "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml",
156 | "https://www.oneusefulthing.org/feed",
157 | "https://blog.paperspace.com/rss/",
158 | "https://petapixel.com/feed",
159 | "https://erichartford.com/rss.xml",
160 | "https://minimaxir.com/post/index.xml",
161 | "https://api.quantamagazine.org/feed",
162 | "https://medium.com/feed/radix-ai-blog",
163 | "https://feeds.feedburner.com/RBloggers",
164 | "https://replicate.com/blog/rss",
165 | "https://notes.replicatecodex.com/rss/",
166 | "https://restofworld.org/feed/latest",
167 | "https://tech.eu/category/robotics/feed",
168 | "http://rss.sciam.com/ScientificAmerican-Global",
169 | "https://www.semianalysis.com/feed",
170 | "https://www.siliconrepublic.com/feed",
171 | "https://stackoverflow.blog/feed/",
172 | "https://arxiv.org/rss/stat.ML",
173 | "https://medium.com/feed/@netflixtechblog",
174 | "https://medium.com/feed/@odsc",
175 | "https://syncedreview.com/feed",
176 | "https://synthedia.substack.com/feed",
177 | "https://techcrunch.com/feed/",
178 | "https://www.techmeme.com/feed.xml",
179 | "https://techmonitor.ai/feed",
180 | "https://www.reutersagency.com/feed/?best-topics=tech",
181 | "https://www.techspot.com/backend.xml",
182 | "https://bdtechtalks.com/feed/",
183 | "https://thealgorithmicbridge.substack.com/feed",
184 | "https://the-decoder.com/feed/",
185 | "https://thegradient.pub/rss/",
186 | "https://www.theintrinsicperspective.com/feed/",
187 | "https://thenewstack.io/feed",
188 | "https://thenextweb.com/neural/feed",
189 | "https://rss.beehiiv.com/feeds/2R3C6Bt5wj.xml",
190 | "https://thesequence.substack.com/feed",
191 | "https://www.thestack.technology/latest/rss/",
192 | "https://blog.tensorflow.org/feeds/posts/default?alt=rss",
193 | "https://www.thetradenews.com/feed/",
194 | "http://feeds.libsyn.com/102459/rss",
195 | "https://pub.towardsai.net/feed",
196 | "https://towardsdatascience.com/feed",
197 | "https://unwindai.substack.com/feed",
198 | "https://visualstudiomagazine.com/rss-feeds/news.aspx",
199 | "https://voicebot.ai/feed/",
200 | "https://wandb.ai/fully-connected/rss.xml",
201 | "https://blogs.windows.com/feed",
202 | "https://blog.wolfram.com/feed/",
203 | "https://aihub.org/feed?cat=-473",
204 | # "https://topenddevs.com/podcasts/adventures-in-machine-learning/rss.rss",
205 | "https://aiandbanking.libsyn.com/rss",
206 | "https://feeds.blubrry.com/feeds/aitoday.xml",
207 | "https://feeds.acast.com/public/shows/e421d786-ec36-4148-aa99-7a3b2928a779",
208 | "https://datascienceathome.com/feed.xml",
209 | "https://dataskeptic.libsyn.com/rss",
210 | "https://datastori.es/feed/",
211 | # "https://anchor.fm/s/41286f68/podcast/rss",
212 | "https://aneyeonai.libsyn.com/rss",
213 | # "https://geomob-podcast.castos.com/feed",
214 | # "https://anchor.fm/s/443868ac/podcast/rss",
215 | "https://feeds.captivate.fm/gradient-dissent/",
216 | "https://feed.podbean.com/hdsr/feed.xml",
217 | # "https://anchor.fm/s/174cb1b8/podcast/rss",
218 | "http://feeds.soundcloud.com/users/soundcloud:users:306749289/sounds.rss",
219 | "http://nssdeviations.com/rss",
220 | "https://feeds.transistor.fm/postgres-fm",
221 | "https://changelog.com/practicalai/feed",
222 | "http://lexisnexisbis.libsyn.com/rss",
223 | "https://talkpython.fm/episodes/rss",
224 | "https://feeds.libsyn.com/468519/rss",
225 | # "http://podcast.emerj.com/rss",
226 | "http://feeds.soundcloud.com/users/soundcloud:users:264034133/sounds.rss",
227 | # "https://anchor.fm/s/3952c6f8/podcast/rss",
228 | "https://feeds.transistor.fm/the-data-engineering-show",
229 | "https://thedataexchange.media/feed/",
230 | # "https://api.substack.com/feed/podcast/265424/s/1354.rss",
231 | "https://feeds.megaphone.fm/marketingai",
232 | "https://twimlai.com/feed",
233 | "https://feeds.transistor.fm/this-day-in-ai",
234 | # "https://anchor.fm/s/32ec7408/podcast/rss",
235 | ]
236 | 
237 | sites = {
238 |     "gh_url": "https://api.github.com",
239 |     "ph_url": "https://api.producthunt.com/v2/api/graphql",
240 |     "ph_site_url": "https://www.producthunt.com",
241 |     "hf_base_url": "https://huggingface.co",
242 |     "hf_papers_url": "https://huggingface.co/papers?date=2024-12-20",
243 |     "hf_board_url": "https://huggingface.co/collections/open-llm-leaderboard",
244 |     "gh_daily_url": "https://github.com/trending/python?since=daily&spoken_language_code=en",
245 |     "gh_weekly_url": "https://github.com/trending/python?since=weekly&spoken_language_code=en",
246 |     "arxiv_url": "http://export.arxiv.org/api/query?",
247 |     "events_url": ["https://conferencealerts.co.in/artificial-intelligence",
248 |                   "https://aideadlin.es/?sub=ML,CV,CG,NLP,RO,SP,DM,AP,KR,HCI"],
249 |     "events_feed":"https://aiml.events/feed/rss/"
250 | }


--------------------------------------------------------------------------------
/router/routes.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import csv
  3 | from app.main import *
  4 | from db_handler import TaskType, SchedulerState
  5 | from utils.auth_utility import create_token, token_required
  6 | from utils.utility import is_valid_email, is_email_subscribed, save_to_csv
  7 | 
  8 | from flask_cors import CORS
  9 | from flask_limiter import Limiter
 10 | from flask import Blueprint, jsonify, request
 11 | from flask_limiter.util import get_remote_address
 12 | 
 13 | 
 14 | bp = Blueprint("ailert", __name__, url_prefix="/internal/v1")
 15 | 
 16 | limiter = Limiter(
 17 |     key_func=get_remote_address,
 18 |     default_limits=["10 per day", "2 per hour"]
 19 | )
 20 | 
 21 | CORS(bp, resources={
 22 |     r"/api/*": {
 23 |         "origins": ["https://ailert.tech"],
 24 |         "methods": ["GET", "POST", "PUT", "DELETE"],
 25 |         "allow_headers": ["Content-Type", "Authorization"]
 26 |     }
 27 | })
 28 | 
 29 | config = configparser.ConfigParser()
 30 | config.read('db_handler/vault/secrets.ini')
 31 | user_id = config["JWT"]["user_id"]
 32 | 
 33 | 
 34 | @bp.route('/login', methods=['POST'])
 35 | def login():
 36 |     token = create_token(user_id)
 37 |     return jsonify({
 38 |         "status": "success",
 39 |         "token": token
 40 |     })
 41 | 
 42 | 
 43 | @bp.route('/start-scheduler/<task_type>', methods=['POST'])
 44 | @limiter.limit("5 per hour")
 45 | @token_required
 46 | def start_scheduler(task_type):
 47 |     if task_type not in [t.value for t in TaskType]:
 48 |         return jsonify({
 49 |             "status": "error",
 50 |             "message": "Invalid task type. Use 'daily' or 'weekly'"
 51 |         }), 400
 52 | 
 53 |     if scheduler_state["is_running"]:
 54 |         return jsonify({
 55 |             "status": "error",
 56 |             "message": "Scheduler is already running"
 57 |         }), 400
 58 | 
 59 |     stop_event.clear()
 60 |     scheduler_state["is_running"] = True
 61 |     scheduler_state["is_paused"] = False
 62 |     scheduler_state["task_type"] = task_type
 63 | 
 64 |     scheduler_thread.start()
 65 | 
 66 |     return jsonify({
 67 |         "status": "success",
 68 |         "message": f"{task_type} scheduler started successfully",
 69 |         "state": SchedulerState.RUNNING.value
 70 |     })
 71 | 
 72 | 
 73 | @bp.route('/manage-scheduler/<action>', methods=['POST'])
 74 | @limiter.limit("5 per hour")
 75 | @token_required
 76 | def manage_scheduler(action):
 77 |     if not scheduler_state["is_running"]:
 78 |         return jsonify({
 79 |             "status": "error",
 80 |             "message": "No scheduler is currently running"
 81 |         }), 400
 82 | 
 83 |     if action == "pause":
 84 |         if scheduler_state["is_paused"]:
 85 |             return jsonify({
 86 |                 "status": "error",
 87 |                 "message": "Scheduler is already paused"
 88 |             }), 400
 89 |         scheduler_state["is_paused"] = True
 90 |         state = SchedulerState.PAUSED.value
 91 |         message = "Scheduler paused successfully"
 92 | 
 93 |     elif action == "resume":
 94 |         if not scheduler_state["is_paused"]:
 95 |             return jsonify({
 96 |                 "status": "error",
 97 |                 "message": "Scheduler is not paused"
 98 |             }), 400
 99 |         scheduler_state["is_paused"] = False
100 |         state = SchedulerState.RUNNING.value
101 |         message = "Scheduler resumed successfully"
102 | 
103 |     elif action == "stop":
104 |         stop_event.set()
105 |         if scheduler_thread:
106 |             scheduler_thread.join()
107 |         schedule.clear()
108 |         scheduler_state["task_type"] = None
109 |         state = SchedulerState.STOPPED.value
110 |         message = "Scheduler stopped successfully"
111 | 
112 |     else:
113 |         return jsonify({
114 |             "status": "error",
115 |             "message": "Invalid action. Use 'pause', 'resume', or 'stop'"
116 |         }), 400
117 | 
118 |     return jsonify({
119 |         "status": "success",
120 |         "message": message,
121 |         "state": state,
122 |         "task_type": scheduler_state["task_type"]
123 |     })
124 | 
125 | 
126 | @bp.route('/scheduler-status', methods=['GET'])
127 | @limiter.limit("5 per hour")
128 | @token_required
129 | def get_scheduler_status():
130 |     if not scheduler_state["is_running"]:
131 |         state = SchedulerState.STOPPED.value
132 |     elif scheduler_state["is_paused"]:
133 |         state = SchedulerState.PAUSED.value
134 |     else:
135 |         state = SchedulerState.RUNNING.value
136 | 
137 |     return jsonify({
138 |         "is_running": scheduler_state["is_running"],
139 |         "state": state,
140 |         "task_type": scheduler_state["task_type"]
141 |     })
142 | 
143 | 
144 | @bp.route('/generate-newsletter', methods=['POST'])
145 | @limiter.limit("5 per hour")
146 | @token_required
147 | async def api_generate_newsletter():
148 |     try:
149 |         data = request.get_json()
150 | 
151 |         if not data:
152 |             return jsonify({
153 |                 "status": "error",
154 |                 "message": "No data provided",
155 |                 "timestamp": utility.get_formatted_timestamp()
156 |             }), 400
157 | 
158 |         sections = data.get('sections')
159 |         task_type = data.get('task_type')
160 | 
161 |         if not sections or not task_type:
162 |             return jsonify({
163 |                 "status": "error",
164 |                 "message": "Missing required fields: sections or task_type",
165 |                 "timestamp": utility.get_formatted_timestamp()
166 |             }), 400
167 | 
168 |         if task_type not in [TaskType.WEEKLY.value, TaskType.DAILY.value]:
169 |             return jsonify({
170 |                 "status": "error",
171 |                 "message": f"Invalid task_type. Must be either 'weekly' or 'daily'",
172 |                 "timestamp": utility.get_formatted_timestamp()
173 |             }), 400
174 | 
175 |         newsletter_html = await generate_newsletter(sections, task_type)
176 | 
177 |         return jsonify({
178 |             "status": "success",
179 |             "message": "Newsletter generated successfully",
180 |             "content": newsletter_html,
181 |             "timestamp": utility.get_formatted_timestamp()
182 |         })
183 | 
184 |     except Exception as e:
185 |         logging.error(f"Error generating newsletter: {str(e)}")
186 |         return jsonify({
187 |             "status": "error",
188 |             "message": f"Error generating newsletter: {str(e)}",
189 |             "timestamp": utility.get_formatted_timestamp()
190 |         }), 500
191 | 
192 | 
193 | @bp.route('/save-newsletter', methods=['POST'])
194 | @limiter.limit("5 per hour")
195 | @token_required
196 | def api_save_newsletter():
197 |     try:
198 |         data = request.get_json()
199 | 
200 |         if not data:
201 |             return jsonify({
202 |                 "status": "error",
203 |                 "message": "No data provided",
204 |                 "timestamp": utility.get_formatted_timestamp()
205 |             }), 400
206 | 
207 |         content = data.get('content')
208 |         content_type = data.get('content_type')
209 | 
210 |         if not content or not content_type:
211 |             return jsonify({
212 |                 "status": "error",
213 |                 "message": "Missing required fields: content or content_type",
214 |                 "timestamp": utility.get_formatted_timestamp()
215 |             }), 400
216 | 
217 |         if content_type not in ['weekly', 'daily']:
218 |             return jsonify({
219 |                 "status": "error",
220 |                 "message": "Invalid content_type. Must be either 'weekly' or 'daily'",
221 |                 "timestamp": utility.get_formatted_timestamp()
222 |             }), 400
223 | 
224 |         saved_item = save_to_db(content, content_type)
225 | 
226 |         return jsonify({
227 |             "status": "success",
228 |             "message": "Newsletter saved successfully",
229 |             "newsletterId": saved_item["newsletterId"],
230 |             "timestamp": utility.get_formatted_timestamp()
231 |         })
232 | 
233 |     except Exception as e:
234 |         logging.error(f"Error saving newsletter: {str(e)}")
235 |         return jsonify({
236 |             "status": "error",
237 |             "message": f"Error saving newsletter: {str(e)}",
238 |             "timestamp": utility.get_formatted_timestamp()
239 |         }), 500
240 | 
241 | 
242 | @bp.route('/send-email', methods=['POST'])
243 | @limiter.limit("5 per hour")
244 | @token_required
245 | async def api_send_email():
246 |     try:
247 |         data = request.get_json()
248 | 
249 |         if not data:
250 |             return jsonify({
251 |                 "status": "error",
252 |                 "message": "No data provided",
253 |                 "timestamp": utility.get_formatted_timestamp()
254 |             }), 400
255 | 
256 |         recipients = data.get('recipients', [])
257 |         content = data.get('content')
258 |         template_id = data.get('template_id')
259 | 
260 |         if not content:
261 |             return jsonify({
262 |                 "status": "error",
263 |                 "message": "Missing required field: content",
264 |                 "timestamp": utility.get_formatted_timestamp()
265 |             }), 400
266 | 
267 |         result = await send_email(recipients, content, template_id)
268 | 
269 |         return jsonify({
270 |             **result,  # Include all fields from the EmailService response
271 |             "timestamp": utility.get_formatted_timestamp()
272 |         })
273 | 
274 |     except Exception as e:
275 |         logging.error(f"Error sending email: {str(e)}")
276 |         return jsonify({
277 |             "status": "error",
278 |             "message": f"Error sending email: {str(e)}",
279 |             "timestamp": utility.get_formatted_timestamp()
280 |         }), 500
281 | 
282 | 
283 | @bp.route('/generate-and-send', methods=['POST'])
284 | @limiter.limit("5 per hour")
285 | @token_required
286 | async def api_generate_and_send():
287 |     try:
288 |         data = request.get_json()
289 | 
290 |         if not data:
291 |             return jsonify({
292 |                 "status": "error",
293 |                 "message": "No data provided",
294 |                 "timestamp": utility.get_formatted_timestamp()
295 |             }), 400
296 | 
297 |         sections = data.get('sections')
298 |         task_type = data.get('task_type')
299 |         recipients = data.get('recipients', [])
300 | 
301 |         if not sections or not task_type:
302 |             return jsonify({
303 |                 "status": "error",
304 |                 "message": "Missing required fields: sections or task_type",
305 |                 "timestamp": utility.get_formatted_timestamp()
306 |             }), 400
307 | 
308 |         # Generate newsletter
309 |         newsletter_html = await generate_newsletter(sections, task_type)
310 | 
311 |         # Save to database
312 |         saved_item = save_to_db(newsletter_html, task_type)
313 | 
314 |         # Send email
315 |         email_result = await send_email(recipients, saved_item["content"], saved_item["newsletterId"])
316 | 
317 |         return jsonify({
318 |             "status": "success",
319 |             "message": "Newsletter generated and sent successfully",
320 |             "newsletterId": saved_item["newsletterId"],
321 |             "email_status": email_result,
322 |             "timestamp": utility.get_formatted_timestamp()
323 |         })
324 | 
325 |     except Exception as e:
326 |         logging.error(f"Error in generate and send workflow: {str(e)}")
327 |         return jsonify({
328 |             "status": "error",
329 |             "message": f"Error in generate and send workflow: {str(e)}",
330 |             "timestamp": utility.get_formatted_timestamp()
331 |         }), 500
332 | 
333 | 
334 | @bp.route('/subscribe', methods=['POST'])
335 | def subscribe():
336 |     try:
337 |         data = request.get_json()
338 | 
339 |         if not data or 'email' not in data:
340 |             return jsonify({
341 |                 "status": "error",
342 |                 "message": "Email is required",
343 |                 "timestamp": utility.get_formatted_timestamp()
344 |             }), 400
345 | 
346 |         email = data['email'].lower().strip()
347 | 
348 |         if not is_valid_email(email):
349 |             return jsonify({
350 |                 "status": "error",
351 |                 "message": "Invalid email format",
352 |                 "timestamp": utility.get_formatted_timestamp()
353 |             }), 400
354 | 
355 |         if is_email_subscribed(email):
356 |             return jsonify({
357 |                 "status": "error",
358 |                 "message": "Email already subscribed",
359 |                 "timestamp": utility.get_formatted_timestamp()
360 |             }), 400
361 | 
362 |         if save_to_csv(email):
363 |             return jsonify({
364 |                 "status": "success",
365 |                 "message": "Successfully subscribed",
366 |                 "timestamp": utility.get_formatted_timestamp()
367 |             }), 201
368 |         else:
369 |             return jsonify({
370 |                 "status": "error",
371 |                 "message": "Failed to save subscription",
372 |                 "timestamp": utility.get_formatted_timestamp()
373 |             }), 500
374 | 
375 |     except Exception as e:
376 |         logging.error(f"Error in subscribe endpoint: {str(e)}")
377 |         return jsonify({
378 |             "status": "error",
379 |             "message": "Internal server error",
380 |             "timestamp": utility.get_formatted_timestamp()
381 |         }), 500
382 | 
383 | 
384 | @bp.route('/unsubscribe', methods=['POST'])
385 | def unsubscribe():
386 |     try:
387 |         data = request.get_json()
388 | 
389 |         if not data or 'email' not in data:
390 |             return jsonify({
391 |                 "status": "error",
392 |                 "message": "Email is required",
393 |                 "timestamp": utility.get_formatted_timestamp()
394 |             }), 400
395 | 
396 |         email = data['email'].lower().strip()
397 |         csv_file = 'db_handler/vault/subscribers.csv'
398 | 
399 |         if not os.path.exists(csv_file):
400 |             return jsonify({
401 |                 "status": "error",
402 |                 "message": "Email not found",
403 |                 "timestamp": utility.get_formatted_timestamp()
404 |             }), 404
405 | 
406 |         temp_rows = []
407 |         found = False
408 | 
409 |         with open(csv_file, 'r') as file:
410 |             reader = csv.reader(file)
411 |             temp_rows.append(next(reader))  # Keep header
412 |             for row in reader:
413 |                 if row[0] != email:
414 |                     temp_rows.append(row)
415 |                 else:
416 |                     found = True
417 | 
418 |         if not found:
419 |             return jsonify({
420 |                 "status": "error",
421 |                 "message": "Email not found",
422 |                 "timestamp": utility.get_formatted_timestamp()
423 |             }), 404
424 | 
425 |         with open(csv_file, 'w', newline='') as file:
426 |             writer = csv.writer(file)
427 |             writer.writerows(temp_rows)
428 | 
429 |         return jsonify({
430 |             "status": "success",
431 |             "message": "Successfully unsubscribed",
432 |             "timestamp": utility.get_formatted_timestamp()
433 |         })
434 | 
435 |     except Exception as e:
436 |         logging.error(f"Error in unsubscribe endpoint: {str(e)}")
437 |         return jsonify({
438 |             "status": "error",
439 |             "message": "Internal server error",
440 |             "timestamp": utility.get_formatted_timestamp()
441 |         }), 500
442 | 
443 | 


--------------------------------------------------------------------------------