├── db_handler ├── sample_vault │ ├── recipients.csv │ ├── secrets.ini │ └── links.py ├── __init__.py ├── models.py ├── db.py └── dynamo.py ├── .gitignore ├── services ├── crawler │ ├── blog_crawler.py │ ├── social_media_crawler.py │ ├── __init__.py │ └── rss_crawler.py ├── apps │ ├── or_service.py │ ├── __init__.py │ ├── kg_service.py │ ├── hf_service.py │ ├── gh_service.py │ ├── ph_service.py │ └── arx_service.py ├── __init__.py ├── competition_service.py ├── product_service.py ├── research_service.py ├── email_service.py ├── news_service.py └── event_service.py ├── .dockerignore ├── launch.py ├── requirements.txt ├── static ├── logo.svg ├── favicon.svg ├── newsletter.html └── style.css ├── LICENSE ├── Dockerfile ├── utils ├── auth_utility.py └── utility.py ├── .github └── workflows │ └── deploy.yml ├── app └── main.py ├── README.md ├── CODE_OF_CONDUCT.md └── router └── routes.py /db_handler/sample_vault/recipients.csv: -------------------------------------------------------------------------------- 1 | email 2 | add-your-email@test.com -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /test.py 2 | /static/generated_newsletter.html 3 | /db_handler/vault/ 4 | /sync-vault*.sh 5 | -------------------------------------------------------------------------------- /services/crawler/blog_crawler.py: -------------------------------------------------------------------------------- 1 | class SubstackCrawler: 2 | pass 3 | 4 | class MediumCrawler: 5 | pass -------------------------------------------------------------------------------- /services/crawler/social_media_crawler.py: -------------------------------------------------------------------------------- 1 | class TwitterCrawler: 2 | pass 3 | 4 | class LinkedinCrawler: 5 | pass -------------------------------------------------------------------------------- /db_handler/__init__.py: -------------------------------------------------------------------------------- 1 | from db_handler.db import * 2 | from db_handler.models import * 3 | from db_handler.dynamo import Dynamo 4 | from db_handler.vault.links import rss_feed, sites -------------------------------------------------------------------------------- /services/apps/or_service.py: -------------------------------------------------------------------------------- 1 | class OpenReviewScanner: 2 | def __init__(self, top_n: int = 5): 3 | self.top_n = top_n 4 | 5 | def get_top_n_papers(self): 6 | pass -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.pyc 3 | *.pyo 4 | *.pyd 5 | .Python 6 | env/ 7 | venv/ 8 | .env 9 | *.log 10 | .git 11 | .gitignore 12 | .pytest_cache/ 13 | .coverage 14 | htmlcov/ 15 | .DS_Store 16 | test.py 17 | *.sqlite -------------------------------------------------------------------------------- /launch.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | from router.routes import bp, limiter 3 | import os 4 | 5 | app = Flask(__name__) 6 | 7 | limiter.init_app(app) 8 | app.register_blueprint(bp) 9 | 10 | 11 | if __name__ == "__main__": 12 | port = int(os.environ.get("PORT", 5000)) 13 | app.run(host="0.0.0.0", port=port, debug=True) 14 | -------------------------------------------------------------------------------- /services/crawler/__init__.py: -------------------------------------------------------------------------------- 1 | from services.crawler.rss_crawler import * 2 | from services.crawler.blog_crawler import SubstackCrawler, MediumCrawler 3 | from services.crawler.social_media_crawler import LinkedinCrawler, TwitterCrawler 4 | 5 | __all__ = [ 6 | "SubstackCrawler", 7 | "MediumCrawler", 8 | "LinkedinCrawler", 9 | "TwitterCrawler" 10 | ] -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | substack-api==1.1.1 2 | kaggle==1.7.4.5 3 | pydantic==2.11.9 4 | pandas 5 | requests==2.32.5 6 | simplejson==3.20.2 7 | botocore==1.40.44 8 | schedule==1.2.2 9 | PyJWT 10 | cryptography==46.0.2 11 | beautifulsoup4==4.14.2 12 | numpy 13 | scikit-learn 14 | feedparser==6.0.12 15 | pytz==2025.2 16 | Flask 17 | Flask-Cors==6.0.1 18 | Flask-Limiter 19 | uvicorn 20 | sqlitedict==2.1.0 21 | sendgrid==7.0.0rc2 22 | boto3==1.40.44 23 | -------------------------------------------------------------------------------- /services/apps/__init__.py: -------------------------------------------------------------------------------- 1 | from services.apps.arx_service import ArxivScanner 2 | from services.apps.gh_service import GitHubScanner 3 | from services.apps.hf_service import HuggingFaceScanner 4 | from services.apps.kg_service import KaggleScanner 5 | from services.apps.or_service import OpenReviewScanner 6 | from services.apps.ph_service import ProductHuntScanner 7 | 8 | __all__ = [ 9 | "ArxivScanner", 10 | "GitHubScanner", 11 | "HuggingFaceScanner", 12 | "KaggleScanner", 13 | "OpenReviewScanner", 14 | "ProductHuntScanner" 15 | ] -------------------------------------------------------------------------------- /db_handler/sample_vault/secrets.ini: -------------------------------------------------------------------------------- 1 | [default] 2 | brand_name = "AiLert" 3 | 4 | [HuggingFace] 5 | # token = add github token and uncomment this line 6 | 7 | [Kaggle] 8 | # path = add kaggle credential file path here and uncomment 9 | 10 | 11 | [Dynamo] 12 | # region = us-east-1 13 | 14 | [Arxiv] 15 | # q = cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.AI+OR+cat:cs.NE+OR+cat:cs.RO 16 | 17 | [Sendgrid] 18 | # api_key = add sendgrid api key and uncomment 19 | 20 | [JWT] 21 | # user_id = test 22 | # token = generate a random token that your apis will accept -------------------------------------------------------------------------------- /static/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /services/__init__.py: -------------------------------------------------------------------------------- 1 | from services.news_service import NewsService 2 | from services.event_service import EventsService 3 | from services.research_service import ResearchService 4 | from services.apps.gh_service import GitHubScanner 5 | from services.competition_service import CompetitionService 6 | from services.product_service import ProductService 7 | from services.email_service import EmailService 8 | 9 | 10 | __all__ = [ 11 | "NewsService", 12 | "GitHubScanner", 13 | "CompetitionService", 14 | "EventsService", 15 | "ResearchService", 16 | "ProductService", 17 | "EmailService" 18 | ] -------------------------------------------------------------------------------- /services/competition_service.py: -------------------------------------------------------------------------------- 1 | from db_handler import Competitions 2 | from services.apps import KaggleScanner 3 | 4 | class CompetitionService: 5 | def __init__(self): 6 | self.kaggle = KaggleScanner() 7 | self.competitions = [] 8 | 9 | async def get_latest_competitions(self): 10 | kaggle = self.kaggle.get_new_competitions_launch() 11 | self.competitions.extend([Competitions( 12 | name = comp["name"], 13 | link = comp["link"], 14 | deadline = comp["deadline"], 15 | reward = comp["reward"] 16 | ) for comp in kaggle]) 17 | 18 | return self.competitions -------------------------------------------------------------------------------- /services/product_service.py: -------------------------------------------------------------------------------- 1 | from db_handler import Products, sites 2 | from services.apps import HuggingFaceScanner, ProductHuntScanner 3 | 4 | class ProductService: 5 | def __init__(self): 6 | self.hf_scanner = HuggingFaceScanner(sites["hf_base_url"],1) 7 | self.ph_scanner = ProductHuntScanner(sites["ph_site_url"], sites["ph_url"],1) 8 | self.products = [] 9 | 10 | async def get_latest_products(self): 11 | hf_products = self.hf_scanner.weekly_scanner() 12 | ph_products = None #self.ph_scanner.get_last_week_top_products() 13 | final_dict = hf_products #+ ph_products 14 | for key, items in final_dict.items(): 15 | for item in items: 16 | self.products.append(Products( 17 | name = item["title"], 18 | link = item["link"], 19 | summary = item["summary"], 20 | source = item["source"], 21 | engagement = item["engagement"] 22 | )) 23 | return self.products 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Anuj Gupta 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Use Python 3.11 slim image as base 2 | FROM python:3.13-slim 3 | 4 | # Set working directory 5 | WORKDIR /app 6 | 7 | # Set environment variables 8 | ENV PYTHONDONTWRITEBYTECODE=1 \ 9 | PYTHONUNBUFFERED=1 \ 10 | FLASK_APP=launch.py \ 11 | FLASK_ENV=production 12 | 13 | # Install system dependencies 14 | RUN apt-get update && apt-get install -y --no-install-recommends \ 15 | build-essential \ 16 | libpq-dev \ 17 | && rm -rf /var/lib/apt/lists/* 18 | 19 | # Create non-root user for security 20 | RUN adduser --disabled-password --gecos '' appuser 21 | 22 | # Copy requirements first to leverage Docker cache 23 | COPY requirements.txt . 24 | 25 | # Install Python dependencies 26 | RUN pip install --no-cache-dir -r requirements.txt 27 | 28 | # Copy the rest of the application 29 | COPY . . 30 | 31 | # Create vault directory 32 | RUN mkdir -p /app/db_handler/vault && \ 33 | chown -R appuser:appuser /app 34 | 35 | # Switch to non-root user 36 | USER appuser 37 | 38 | # Create volume for vault 39 | VOLUME ["/app/db_handler/vault"] 40 | 41 | # Expose port 42 | EXPOSE 5000 43 | 44 | # Command to run the application 45 | CMD ["python", "launch.py"] -------------------------------------------------------------------------------- /static/favicon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 10 | 11 | 12 | 13 | 20 | 21 | 22 | 28 | 34 | 35 | 36 | 37 | 45 | 51 | 52 | -------------------------------------------------------------------------------- /utils/auth_utility.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import jwt 4 | import configparser 5 | from functools import wraps 6 | from flask import request, jsonify 7 | from datetime import datetime, timedelta 8 | 9 | config = configparser.ConfigParser() 10 | config.read('db_handler/vault/secrets.ini') 11 | JWT_SECRET_KEY = config["JWT"]["token"] 12 | 13 | 14 | def create_token(user_id): 15 | payload = { 16 | 'exp': datetime.now() + timedelta(days=1), 17 | 'sub': user_id 18 | } 19 | 20 | token = jwt.encode( 21 | payload, 22 | JWT_SECRET_KEY, 23 | algorithm='HS256' 24 | ) 25 | return token 26 | 27 | 28 | def token_required(f): 29 | @wraps(f) 30 | def decorated(*args, **kwargs): 31 | token = None 32 | 33 | # Check for token in headers 34 | if 'Authorization' in request.headers: 35 | token = request.headers['Authorization'].split(" ")[1] 36 | 37 | if not token: 38 | return jsonify({ 39 | 'message': 'Token is missing', 40 | 'status': 'error' 41 | }), 401 42 | 43 | try: 44 | # Decode token 45 | data = jwt.decode(token, JWT_SECRET_KEY, algorithms=["HS256"]) 46 | current_user = data['sub'] 47 | except Exception as e: 48 | logging.info("Token error" + str(e)) 49 | return jsonify({ 50 | 'message': 'Token is invalid', 51 | 'status': 'error' 52 | }), 401 53 | 54 | return f(*args, **kwargs) 55 | 56 | return decorated 57 | -------------------------------------------------------------------------------- /db_handler/models.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from pydantic import BaseModel 3 | from typing import List, Optional 4 | 5 | class TaskType(Enum): 6 | DAILY = "daily" 7 | WEEKLY = "weekly" 8 | 9 | class SchedulerState(Enum): 10 | RUNNING = "running" 11 | PAUSED = "paused" 12 | STOPPED = "stopped" 13 | 14 | class NewsItem(BaseModel): 15 | title: str 16 | description: str 17 | link: str 18 | read_time: int 19 | source: Optional[str] = None 20 | engagement: Optional[str] = None 21 | additional_info: Optional[dict] = None 22 | 23 | class Competitions(BaseModel): 24 | name: str 25 | link: str 26 | deadline: str 27 | reward: str 28 | 29 | class Repo(BaseModel): 30 | name: str 31 | link: str 32 | summary: str 33 | source: Optional[str] = None 34 | engagement: Optional[str] = None 35 | 36 | class Products(BaseModel): 37 | name: str 38 | link: str 39 | summary: str 40 | source: Optional[str] = None 41 | engagement: Optional[str] = None 42 | 43 | class Event(BaseModel): 44 | title: str 45 | date: str 46 | location: str 47 | description: str 48 | 49 | class ResearchPaper(BaseModel): 50 | title: str 51 | authors: List[str] 52 | abstract: str 53 | publication: str 54 | link: str 55 | date: str 56 | engagement: Optional[str] = None 57 | 58 | class NewsletterContent(BaseModel): 59 | # model_config = dict(arbitrary_types_allowed=True) 60 | highlights: List[dict] | None = None 61 | breaking_news: List[NewsItem] | None = None 62 | research_papers: List[ResearchPaper] | None = None 63 | latest_competitions: List[Competitions] | None = None 64 | top_products: List[Products] | None = None 65 | github_trending: List[Repo] | None = None 66 | upcoming_events: List[Event] | None = None -------------------------------------------------------------------------------- /services/apps/kg_service.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | import os 3 | import subprocess 4 | 5 | config = configparser.ConfigParser() 6 | config.read('db_handler/vault/secrets.ini') 7 | 8 | default_cred = config["Kaggle"]["path"] 9 | 10 | class KaggleScanner: 11 | def __init__(self, base_url: str = "", top_n=5, kaggle_cred_path=default_cred): 12 | self.base_url = base_url 13 | self.top_n = top_n 14 | self.kaggle_cred_path = kaggle_cred_path 15 | self.response = [] 16 | 17 | def _get_top_n_kaggle_competitions(self): 18 | try: 19 | os.environ["KAGGLE_CONFIG_DIR"] = os.path.expanduser(self.kaggle_cred_path) 20 | result = subprocess.run( 21 | ["kaggle", "competitions", "list", "--sort-by", "prize"], 22 | stdout=subprocess.PIPE, 23 | text=True 24 | ) 25 | if result.returncode != 0: 26 | print("Error fetching Kaggle competitions:", result.stderr) 27 | return 28 | 29 | lines = result.stdout.strip().split("\n") 30 | data_rows = [line for line in lines if "https://www.kaggle.com" in line] 31 | response = [] 32 | 33 | for row in data_rows[:self.top_n]: 34 | columns = row.split() 35 | if len(columns) > 0: 36 | competition_link = columns[0] 37 | deadline = columns[1] 38 | reward = columns[4] 39 | 40 | competition_name = competition_link.split("/")[-1] 41 | 42 | response.append({ 43 | "name": competition_name, 44 | "link": competition_link, 45 | "deadline": deadline, 46 | "reward": reward 47 | }) 48 | return response 49 | except Exception as e: 50 | print(f"Error: {e}") 51 | 52 | def get_new_competitions_launch(self): 53 | self.response = self._get_top_n_kaggle_competitions() 54 | return self.response 55 | -------------------------------------------------------------------------------- /services/apps/hf_service.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | 3 | import requests 4 | 5 | config = configparser.ConfigParser() 6 | config.read('db_handler/vault/secrets.ini') 7 | 8 | default_token = config["HuggingFace"]["token"] 9 | 10 | class HuggingFaceScanner: 11 | def __init__(self, base_url, top_n=5, auth_token=default_token): 12 | self.base_url = base_url 13 | self.top_n = top_n 14 | self.auth_token = "Bearer "+auth_token 15 | self.response = {} 16 | 17 | def _top_models(self, top_n): 18 | url = self.base_url+"/api/models" 19 | response = requests.get( 20 | url, params={"limit": top_n, "full": "True", "config": "False"}, 21 | headers={"Authorization":self.auth_token} 22 | ) 23 | return [{"title":model["modelId"], 24 | "link":self.base_url+model["id"], 25 | "summary": model["author"], 26 | "source":"HuggingFace", 27 | "engagement": str(model["trendingScore"])}for model in response.json()] 28 | 29 | def _top_datasets(self, top_n): 30 | url = self.base_url+"/api/datasets" 31 | response = requests.get( 32 | url, params={"limit": top_n, "full": "False"}, 33 | headers={"Authorization":self.auth_token} 34 | ) 35 | return [{"title": dataset["id"], 36 | "link": self.base_url + dataset["id"], 37 | "summary": dataset["author"], 38 | "source": "HuggingFace", 39 | "engagement": str(dataset["trendingScore"])} for dataset in response.json()] 40 | 41 | def _top_apps(self, top_n): 42 | url = self.base_url+"/api/spaces" 43 | response = requests.get( 44 | url, params={"limit": top_n, "full": "True"}, 45 | headers={"Authorization":self.auth_token} 46 | ) 47 | return [{"title": apps["id"], 48 | "link": self.base_url + apps["id"], 49 | "summary": apps["author"], 50 | "source": "HuggingFace", 51 | "engagement": str(apps["trendingScore"])} for apps in response.json()] 52 | 53 | def weekly_scanner(self): 54 | self.response["top_models"] = self._top_models(self.top_n) 55 | self.response["top_datasets"] = self._top_datasets(self.top_n) 56 | self.response["top_apps"] = self._top_apps(self.top_n) 57 | return self.response -------------------------------------------------------------------------------- /services/research_service.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import configparser 3 | from sklearn import svm 4 | from db_handler import sites 5 | from typing import List, Dict 6 | 7 | from db_handler import ResearchPaper 8 | from services.apps import ArxivScanner 9 | from services.apps import OpenReviewScanner 10 | from sklearn.feature_extraction.text import TfidfVectorizer 11 | 12 | 13 | config = configparser.ConfigParser() 14 | config.read('db_handler/vault/secrets.ini') 15 | 16 | class ResearchService: 17 | def __init__(self, top_n:int = 3): 18 | self.top_n = top_n 19 | self. arxiv = ArxivScanner(sites["arxiv_url"], top_n=top_n) 20 | self.open_review = OpenReviewScanner(top_n=top_n) 21 | self.top_papers = [] 22 | 23 | def _rerank(self, arxiv_papers: List[Dict], open_papers: List[Dict]) -> List[Dict]: 24 | all_papers = arxiv_papers + open_papers 25 | texts = [f"{p['title']} {p['abstract']} {' '.join(p['authors'])}" for p in all_papers] 26 | 27 | vectorizer = TfidfVectorizer( 28 | max_features=5000, 29 | stop_words='english', 30 | ngram_range=(1, 2) 31 | ) 32 | x = vectorizer.fit_transform(texts) 33 | y = np.zeros(len(all_papers)) 34 | for i, paper in enumerate(all_papers): 35 | score = float(paper.get('score', 0)) 36 | citations = float(paper.get('citations', 0)) 37 | y[i] = score + 0.1 * citations 38 | 39 | if y.max() > y.min(): 40 | y = (y - y.min()) / (y.max() - y.min()) 41 | 42 | clf = svm.LinearSVC( 43 | class_weight='balanced', 44 | max_iter=1000, 45 | dual=False 46 | ) 47 | clf.fit(x, y > np.median(y)) 48 | scores = clf.decision_function(x) 49 | scored_papers = [(paper, score) for paper, score in zip(all_papers, scores)] 50 | reranked = sorted(scored_papers, key=lambda x: x[1], reverse=True) 51 | return [paper for paper, _ in reranked[:self.top_n]] 52 | 53 | async def get_latest_papers(self): 54 | search_query = config["Arxiv"]["q"] 55 | arxiv_papers = self.arxiv.get_top_n_papers(search_query=search_query) 56 | open_r_papers = self.open_review.get_top_n_papers() 57 | reranked_papers = self._rerank(arxiv_papers, open_r_papers) 58 | self.top_papers.extend(ResearchPaper( 59 | title = paper["title"], 60 | abstract= paper["abstract"], 61 | authors = paper["authors"], 62 | publication = paper["publication"], 63 | date = paper["_time_str"], 64 | link = paper["url"], 65 | engagement = "") for paper in reranked_papers) 66 | return self.top_papers 67 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy to EC2 2 | 3 | on: 4 | pull_request: 5 | types: [closed] 6 | branches: [ main ] 7 | workflow_dispatch: 8 | 9 | env: 10 | DOCKER_IMAGE_TAG: ${{ github.sha }} 11 | 12 | jobs: 13 | deploy: 14 | runs-on: ubuntu-latest 15 | if: github.event.pull_request.merged == true 16 | 17 | steps: 18 | - name: Checkout Repository 19 | uses: actions/checkout@v4 20 | 21 | - name: Configure AWS Credentials 22 | uses: aws-actions/configure-aws-credentials@v4 23 | with: 24 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} 25 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 26 | aws-region: ${{ secrets.AWS_REGION }} 27 | 28 | - name: Setup SSH and Deploy 29 | run: | 30 | # Setup SSH 31 | mkdir -p ~/.ssh 32 | echo "${{ secrets.EC2_SSH_KEY }}" > ~/.ssh/id_rsa 33 | chmod 600 ~/.ssh/id_rsa 34 | ssh-keyscan -H ${{ secrets.EC2_HOST }} >> ~/.ssh/known_hosts 35 | 36 | # Deploy to EC2 37 | ssh ${{ secrets.EC2_USER }}@${{ secrets.EC2_HOST }} " 38 | # Export AWS credentials 39 | export AWS_ACCESS_KEY_ID='${{ secrets.AWS_ACCESS_KEY_ID }}' 40 | export AWS_SECRET_ACCESS_KEY='${{ secrets.AWS_SECRET_ACCESS_KEY }}' 41 | export AWS_REGION='${{ secrets.AWS_REGION }}' 42 | 43 | # Create and setup directories 44 | sudo mkdir -p /data/newsletter/vault 45 | sudo chown -R ${{ secrets.EC2_USER }}:${{ secrets.EC2_USER }} /data/newsletter 46 | 47 | # Sync S3 48 | aws s3 sync s3://${{ secrets.S3_CONFIG_BUCKET }}/vault/ /data/newsletter/vault/ 49 | 50 | # Deploy with Docker 51 | sudo docker build -t ailert-newsletter:${{ env.DOCKER_IMAGE_TAG }} https://github.com/${{ github.repository }}.git#${{ github.ref }} 52 | 53 | sudo docker stop ailert-newsletter || true 54 | sudo docker rm ailert-newsletter || true 55 | 56 | sudo docker run -d \ 57 | --name ailert-newsletter \ 58 | -p 5000:5000 \ 59 | -v /data/newsletter/vault:/app/db_handler/vault \ 60 | --restart unless-stopped \ 61 | -e AWS_ACCESS_KEY_ID='${{ secrets.AWS_ACCESS_KEY_ID }}' \ 62 | -e AWS_SECRET_ACCESS_KEY='${{ secrets.AWS_SECRET_ACCESS_KEY }}' \ 63 | -e AWS_REGION='${{ secrets.AWS_REGION }}' \ 64 | -e SMTP_USERNAME='${{ secrets.SMTP_USERNAME }}' \ 65 | -e SMTP_PASSWORD='${{ secrets.SMTP_PASSWORD }}' \ 66 | -e JWT_SECRET='${{ secrets.JWT_SECRET }}' \ 67 | ailert-newsletter:${{ env.DOCKER_IMAGE_TAG }} 68 | 69 | sudo docker system prune -f --volumes 70 | " 71 | 72 | - name: Cleanup 73 | if: always() 74 | run: rm -f ~/.ssh/id_rsa -------------------------------------------------------------------------------- /static/newsletter.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | AiLert Weekly Newsletter 7 | 8 | 9 | 10 | 16 | 17 | 18 | 19 | 26 | 27 | 28 | 34 | 40 | 41 | 42 | 43 | 51 | 57 | 58 | 59 | 60 | 61 | 62 | 63 |
64 |
65 | Signup | 66 | Follow on X 67 |
68 | 69 |
70 |
71 |
72 | 73 |
{{brand_name}}
74 |
75 |

Your Weekly AI Intelligence Pulse

76 |

Dive into this week's most groundbreaking AI developments.

77 |
78 |
79 | 80 | {{content}} 81 | 82 | 91 |
92 | 93 | -------------------------------------------------------------------------------- /services/apps/gh_service.py: -------------------------------------------------------------------------------- 1 | import jwt 2 | import time 3 | import requests 4 | import configparser 5 | from db_handler import Repo 6 | from bs4 import BeautifulSoup 7 | 8 | config = configparser.ConfigParser() 9 | config.read('db_handler/vault/secrets.ini') 10 | 11 | default_pem = config["GitHub"]["pem_path"] 12 | default_clientId = config["GitHub"]["client_id"] 13 | 14 | class GitHubScanner: 15 | def __init__(self, site_url, ftype, top_n=5, pem_path=default_pem, client_id=default_clientId): 16 | self.site_url = site_url 17 | self.ftype = ftype 18 | self.top_n = top_n 19 | self.pem_path = pem_path 20 | self.client_id = client_id 21 | self.response = [] 22 | 23 | def _gh_authenticate(self): 24 | with open(self.pem_path, 'rb') as pem_file: 25 | signing_key = pem_file.read() 26 | 27 | payload = { 28 | 'iat': int(time.time()), 29 | 'exp': int(time.time()) + 600, 30 | 'iss': self.client_id 31 | } 32 | 33 | encoded_jwt = jwt.encode(payload, signing_key, algorithm='RS256') 34 | return encoded_jwt 35 | 36 | def _extract_from_html(self, link): 37 | repos = [] 38 | try: 39 | response = requests.get(link) 40 | response.raise_for_status() 41 | soup = BeautifulSoup(response.text, 'html.parser') 42 | repo_list = soup.find_all('article', class_='Box-row') 43 | 44 | for repo in repo_list: 45 | name = repo.find('h2', class_='h3').text.strip().replace('\n', '').replace(' ', '') 46 | 47 | description = repo.find('p', class_='col-9 color-fg-muted my-1 pr-4') 48 | description = description.text.strip() if description else "No description provided." 49 | 50 | stars_element = repo.find('a', class_='Link Link--muted d-inline-block mr-3') or \ 51 | repo.find('a', class_='Link--muted d-inline-block mr-3') 52 | stars = stars_element.text.strip().replace(',', '') if stars_element else "0" 53 | 54 | fork_elements = repo.find_all('a', class_='Link Link--muted d-inline-block mr-3') or \ 55 | repo.find_all('a', class_='Link--muted d-inline-block mr-3') 56 | forks = fork_elements[1].text.strip().replace(',', '') if len(fork_elements) > 1 else "0" 57 | 58 | repos.append({ 59 | 'name': name, 60 | 'description': description, 61 | 'stars': str(stars), 62 | 'forks': str(forks) 63 | }) 64 | 65 | return repos[:self.top_n] 66 | except Exception as e: 67 | print(f"Error: {str(e)}") 68 | 69 | def _daily_trending_repos(self): 70 | repositories = self._extract_from_html(self.site_url) 71 | return repositories 72 | 73 | def _weekly_trending_repos(self): 74 | repositories = self._extract_from_html(self.site_url) 75 | return repositories 76 | 77 | async def get_trending_repos(self): 78 | if self.ftype == "daily": 79 | repositories = self._daily_trending_repos() 80 | else: 81 | repositories = self._weekly_trending_repos() 82 | self.response.extend(Repo( 83 | name = repo["name"], 84 | link = "", 85 | summary = repo["description"], 86 | source = "GitHub", 87 | engagement = repo["stars"]) for repo in repositories) 88 | return self.response -------------------------------------------------------------------------------- /services/apps/ph_service.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from bs4 import BeautifulSoup 3 | from datetime import datetime, timedelta 4 | 5 | class ProductHuntScanner: 6 | def __init__(self, site_url, graph_url, top_n=5): 7 | self.site_url = site_url 8 | self.graph_url = graph_url 9 | self.top_n = top_n 10 | self.response = [] 11 | 12 | def get_last_week_top_products(self): 13 | headers = { 14 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" 15 | } 16 | try: 17 | response = requests.get(self.site_url, headers=headers) 18 | response.raise_for_status() 19 | soup = BeautifulSoup(response.text, "html.parser") 20 | last_week_section = soup.find("section", string="Last Week's Top Products") 21 | if not last_week_section: 22 | print("Could not find 'Last Week's Top Products' section.") 23 | return [] 24 | 25 | products = [] 26 | for product in last_week_section.find_all("li"): 27 | title = product.find("h3").get_text(strip=True) if product.find("h3") else "No Title" 28 | link = product.find("a", href=True)["href"] if product.find("a", href=True) else "No Link" 29 | products.append({"title": title, "link": f"{self.site_url}{link}"}) 30 | 31 | return products 32 | except Exception as e: 33 | print(f"Error fetching data: {e}") 34 | return [] 35 | 36 | def get_last_month_top_products(self, api_key): 37 | query = """ 38 | query ($dateFrom: DateTime!, $dateTo: DateTime!) { 39 | posts(first: 10, postedAfter: $dateFrom, postedBefore: $dateTo, order: VOTES_COUNT) { 40 | edges { 41 | node { 42 | id 43 | name 44 | tagline 45 | url 46 | votesCount 47 | } 48 | } 49 | } 50 | } 51 | """ 52 | today = datetime.utcnow() 53 | first_day_of_this_month = datetime(today.year, today.month, 1) 54 | last_day_of_last_month = first_day_of_this_month - timedelta(days=1) 55 | first_day_of_last_month = datetime(last_day_of_last_month.year, last_day_of_last_month.month, 1) 56 | 57 | variables = { 58 | "dateFrom": first_day_of_last_month.isoformat(), 59 | "dateTo": last_day_of_last_month.isoformat() 60 | } 61 | 62 | # Set headers with API key 63 | headers = { 64 | "Authorization": f"Bearer {api_key}", 65 | "Content-Type": "application/json" 66 | } 67 | 68 | try: 69 | response = requests.post(self.graph_url, json={"query": query, "variables": variables}, headers=headers) 70 | response.raise_for_status() 71 | data = response.json() 72 | 73 | products = data.get("data", {}).get("posts", {}).get("edges", []) 74 | if not products: 75 | print("No products found for last month.") 76 | return [] 77 | 78 | result = [] 79 | for product in products: 80 | node = product["node"] 81 | result.append({ 82 | "title": node["name"], 83 | "summary": node["tagline"], 84 | "link": node["url"], 85 | "engagement": node["votesCount"], 86 | "source": "Product Hunt" 87 | }) 88 | 89 | return result 90 | except Exception as e: 91 | print(f"Error fetching data: {e}") 92 | return [] 93 | -------------------------------------------------------------------------------- /services/crawler/rss_crawler.py: -------------------------------------------------------------------------------- 1 | import pytz 2 | import html 3 | import feedparser 4 | from datetime import datetime 5 | 6 | import requests 7 | import xml.etree.ElementTree as et 8 | from urllib.parse import urlparse 9 | 10 | 11 | def is_rss_feed(url): 12 | try: 13 | parsed_url = urlparse(url) 14 | if not all([parsed_url.scheme, parsed_url.netloc]): 15 | return False 16 | 17 | headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'} 18 | response = requests.get(url, headers=headers, timeout=10) 19 | response.raise_for_status() 20 | 21 | content_type = response.headers.get('content-type', '').lower() 22 | if not any(valid_type in content_type for valid_type in ['application/rss+xml', 'application/xml', 'text/xml']): 23 | return False 24 | 25 | root = et.fromstring(response.content) 26 | rss_indicators = [ 27 | 'rss', 28 | 'feed', 29 | 'channel', 30 | 'item', 31 | 'entry' 32 | ] 33 | 34 | if root.tag in rss_indicators: 35 | return True 36 | 37 | for child in root: 38 | if child.tag in rss_indicators: 39 | return True 40 | return False 41 | except requests.RequestException: 42 | return False 43 | except et.ParseError: 44 | return False 45 | except Exception: 46 | return False 47 | 48 | def load_feed(self, url): 49 | self.feed_url = url 50 | try: 51 | self.feed_data = feedparser.parse(url) 52 | return len(self.feed_data.entries) > 0 53 | except Exception as e: 54 | print(f"Error loading feed: {e}") 55 | return False 56 | 57 | def get_feed_info(self): 58 | if not self.feed_data: 59 | return None 60 | 61 | return { 62 | 'title': self.feed_data.feed.get('title', 'No title'), 63 | 'description': self.feed_data.feed.get('description', 'No description'), 64 | 'link': self.feed_data.feed.get('link', ''), 65 | 'last_updated': self.feed_data.feed.get('updated', 'No update date') 66 | } 67 | 68 | def get_entries(self, limit=None, sort_by_date=True): 69 | if not self.feed_data: 70 | return [] 71 | 72 | entries = [] 73 | for entry in self.feed_data.entries: 74 | clean_entry = { 75 | 'title': html.unescape(entry.get('title', 'No title')), 76 | 'link': entry.get('link', ''), 77 | 'description': html.unescape(entry.get('description', 'No description')), 78 | 'author': entry.get('author', 'Unknown author'), 79 | 'published': entry.get('published', 'No publication date'), 80 | 'updated': entry.get('updated', entry.get('published', 'No update date')) 81 | } 82 | try: 83 | date = entry.get('updated_parsed', entry.get('published_parsed')) 84 | if date: 85 | clean_entry['timestamp'] = datetime(*date[:6], tzinfo=pytz.UTC) 86 | except (TypeError, ValueError): 87 | clean_entry['timestamp'] = None 88 | 89 | entries.append(clean_entry) 90 | if sort_by_date: 91 | entries.sort(key=lambda x: x['timestamp'] if x['timestamp'] else datetime.min.replace(tzinfo=pytz.UTC), 92 | reverse=True) 93 | if limit: 94 | entries = entries[:limit] 95 | 96 | return entries 97 | 98 | def search_entries(self, keyword, case_sensitive=False): 99 | if not self.feed_data: 100 | return [] 101 | 102 | matches = [] 103 | entries = self.get_entries() 104 | 105 | for entry in entries: 106 | search_text = f"{entry['title']} {entry['description']}" 107 | if not case_sensitive: 108 | search_text = search_text.lower() 109 | keyword = keyword.lower() 110 | 111 | if keyword in search_text: 112 | matches.append(entry) 113 | 114 | return matches 115 | -------------------------------------------------------------------------------- /app/main.py: -------------------------------------------------------------------------------- 1 | import time 2 | import logging 3 | import schedule 4 | import configparser 5 | import pandas as pd 6 | from utils import utility 7 | from typing import Optional 8 | from services import EmailService 9 | from threading import Thread, Event 10 | from db_handler import sites, Dynamo, TaskType 11 | from builder.builder import NewsletterBuilder 12 | 13 | logger = logging.getLogger(__name__) 14 | logging.basicConfig(level=logging.INFO) 15 | 16 | stop_event = Event() 17 | scheduler_thread: Optional[Thread] = None 18 | scheduler_state = {"is_running": False, "is_paused": False, "task_type": None} 19 | 20 | config = configparser.ConfigParser() 21 | config.read('db_handler/vault/secrets.ini') 22 | region = config["Dynamo"]["region"] 23 | 24 | dynamo = Dynamo(region) 25 | 26 | df = pd.read_csv("db_handler/vault/recipients.csv") 27 | subscribers = df['email'].tolist() 28 | 29 | def run_scheduler(task_type: str): 30 | if task_type == TaskType.WEEKLY.value: 31 | schedule.every().monday.at("00:00").do(weekly_task) 32 | logging.info("Weekly scheduler started") 33 | else: 34 | schedule.every().day.at("00:00").do(daily_task) 35 | logging.info("Daily scheduler started") 36 | 37 | while not stop_event.is_set(): 38 | if not scheduler_state["is_paused"]: 39 | schedule.run_pending() 40 | time.sleep(1) 41 | 42 | schedule.clear() 43 | scheduler_state["is_running"] = False 44 | logging.info("Scheduler stopped") 45 | 46 | 47 | async def generate_newsletter(sections, task_type): 48 | if task_type == TaskType.WEEKLY.value: 49 | urls = sites["gh_weekly_url"] 50 | else: 51 | urls = sites["gh_daily_url"] 52 | 53 | weekly = NewsletterBuilder({ 54 | "gh_url": urls, 55 | "gh_ftype": task_type}, 56 | dynamo) 57 | weekly.set_sections(sections) 58 | content = await weekly.section_generator() 59 | newsletter_html = await weekly.build(content) 60 | return newsletter_html 61 | 62 | 63 | async def daily_task(): 64 | daily = NewsletterBuilder({ 65 | "gh_url": sites["gh_daily_url"], 66 | "gh_ftype": "daily"}, 67 | dynamo) 68 | daily.set_sections(["news"]) 69 | logger.info(f"starting generator") 70 | content = await daily.section_generator() 71 | logger.info(f"sections generated") 72 | newsletter_html = await daily.build(content) 73 | newsletter_html = utility.inline_css(newsletter_html, "static") 74 | newsletter_html = utility.inline_svg_images(newsletter_html, "static") 75 | logger.info("content updated") 76 | item = save_to_db(newsletter_html, "daily") 77 | logger.info(f"saved to db, sending email") 78 | await send_email(content=item["content"]) 79 | logger.info(f"email sent") 80 | 81 | 82 | async def weekly_task(): 83 | weekly = NewsletterBuilder({ 84 | "gh_url": sites["gh_weekly_url"], 85 | "gh_ftype": "weekly"}, 86 | dynamo) 87 | weekly.set_sections(["all"]) 88 | logger.info(f"starting generator") 89 | content = await weekly.section_generator() 90 | logger.info(f"sections generated") 91 | newsletter_html = await weekly.build(content) 92 | logger.info(f"newsletter build complete") 93 | newsletter_html = utility.inline_css(newsletter_html, "static") 94 | newsletter_html = utility.inline_svg_images(newsletter_html, "static") 95 | logger.info("content updated") 96 | item = save_to_db(newsletter_html, "weekly") 97 | logger.info(f"saved to db, sending email") 98 | await send_email(content=item["content"]) 99 | logger.info(f"email sent") 100 | 101 | 102 | def save_to_db(content, content_type): 103 | try: 104 | item = { 105 | "item_name": "newsletter", 106 | "type": content_type, 107 | "content": content, 108 | "created": utility.get_formatted_timestamp() 109 | } 110 | 111 | item_id = utility.generate_deterministic_id(item, key_fields=["item_name", "type"], prefix="nl") 112 | item["newsletterId"] = item_id 113 | dynamo.add_item("newsletter", "newsletterId", item, False) 114 | return item 115 | except Exception as e: 116 | logging.info("Error saving to dynamo db", e) 117 | 118 | 119 | async def send_email(content=None, template_id=None, recipients=subscribers): 120 | email_service = EmailService( 121 | recipients=recipients, 122 | body_text = content, 123 | template_id=template_id 124 | ) 125 | result = email_service.send_email() 126 | return result 127 | -------------------------------------------------------------------------------- /services/email_service.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import configparser 3 | from typing import List, Optional 4 | from sendgrid import SendGridAPIClient 5 | from sendgrid.helpers.mail import Mail, Content 6 | 7 | 8 | config = configparser.ConfigParser() 9 | config.read('db_handler/vault/secrets.ini') 10 | api_key = config["Sendgrid"]["api_key"] 11 | 12 | class EmailService: 13 | def __init__(self, recipients: Optional[List[str]] = None, 14 | subject: Optional[str] = None, 15 | body_text: Optional[str] = None, 16 | template_id: Optional[str] = None): 17 | self.sender = "weekly@ailert.tech" 18 | self.recipients = recipients if recipients else [] 19 | self.subject = subject if subject else "Weekly Newsletter" 20 | self.charset = "UTF-8" 21 | self.body_text = body_text 22 | self.template_id = template_id 23 | 24 | # Initialize SendGrid client 25 | try: 26 | self.sg_client = SendGridAPIClient(api_key=api_key) 27 | except Exception as e: 28 | logging.error(f"Failed to initialize SendGrid client: {str(e)}") 29 | raise 30 | 31 | def _create_mail_object(self, recipient: str) -> Mail: 32 | """Create a Mail object for a single recipient""" 33 | from_email = self.sender 34 | to_email = recipient 35 | 36 | mail = Mail( 37 | from_email=from_email, 38 | to_emails=to_email, 39 | subject=self.subject, 40 | html_content=self.body_text 41 | ) 42 | 43 | # if self.template_id: 44 | # mail.template_id = self.template_id 45 | # else: 46 | # content = Content("text/html", self.body_text) 47 | # mail.content = [content] 48 | 49 | return mail 50 | 51 | def send_email(self) -> dict: 52 | """ 53 | Send emails to all recipients using SendGrid 54 | Returns: 55 | dict: Status of email sending operation 56 | """ 57 | if not self.recipients: 58 | return { 59 | "status": "error", 60 | "message": "No recipients specified", 61 | "failed_recipients": [] 62 | } 63 | 64 | failed_recipients = [] 65 | successful_count = 0 66 | 67 | for recipient in self.recipients: 68 | try: 69 | mail = self._create_mail_object(recipient) 70 | response = self.sg_client.send(mail) 71 | 72 | if response.status_code in [200, 201, 202]: 73 | successful_count += 1 74 | logging.info(f"Email sent successfully to {recipient}") 75 | else: 76 | failed_recipients.append({ 77 | "email": recipient, 78 | "error": f"SendGrid API returned status code: {response.status_code}" 79 | }) 80 | logging.error(f"Failed to send email to {recipient}. Status code: {response.status_code}") 81 | 82 | except Exception as e: 83 | failed_recipients.append({ 84 | "email": recipient, 85 | "error": str(e) 86 | }) 87 | logging.error(f"Exception while sending email to {recipient}: {str(e)}") 88 | 89 | status = "success" if not failed_recipients else "partial_success" if successful_count else "error" 90 | 91 | return { 92 | "status": status, 93 | "message": f"Successfully sent {successful_count} out of {len(self.recipients)} emails", 94 | "failed_recipients": failed_recipients 95 | } 96 | 97 | def add_recipient(self, recipient: str) -> None: 98 | """Add a single recipient to the email list""" 99 | if recipient not in self.recipients: 100 | self.recipients.append(recipient) 101 | 102 | def add_recipients(self, recipients: List[str]) -> None: 103 | """Add multiple recipients to the email list""" 104 | for recipient in recipients: 105 | self.add_recipient(recipient) 106 | 107 | def set_template_id(self, template_id: str) -> None: 108 | """Set the SendGrid template ID""" 109 | self.template_id = template_id 110 | 111 | def set_body_text(self, body_text: str) -> None: 112 | """Set the email body text""" 113 | self.body_text = body_text 114 | 115 | def set_subject(self, subject: str) -> None: 116 | """Set the email subject""" 117 | self.subject = subject -------------------------------------------------------------------------------- /static/style.css: -------------------------------------------------------------------------------- 1 | /* Base Styles */ 2 | body { 3 | font-family: 'Segoe UI', -apple-system, BlinkMacSystemFont, sans-serif; 4 | line-height: 1.4; 5 | margin: 0; 6 | padding: 0; 7 | background-color: #f0f2f5; 8 | color: #2d3748; 9 | } 10 | 11 | .container { 12 | max-width: 600px; 13 | margin: 0 auto; 14 | background-color: white; 15 | box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); 16 | } 17 | 18 | /* Top Navigation */ 19 | .top-nav { 20 | padding: 4px 0; 21 | text-align: right; 22 | margin-right: 12px; 23 | margin-bottom: 2px; 24 | } 25 | 26 | .top-nav a { 27 | color: #2c3e50; 28 | text-decoration: none; 29 | padding: 3px 8px; 30 | margin: 0 2px; 31 | font-size: 10px; 32 | border-radius: 12px; 33 | border: 1px solid #e5e7eb; 34 | } 35 | 36 | /* Header Styles */ 37 | .header { 38 | background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%); 39 | color: white; 40 | padding: 25px; 41 | border-radius: 12px; 42 | margin: 4px 12px 12px 12px; 43 | } 44 | 45 | .header-top { 46 | display: flex; 47 | align-items: center; 48 | margin-bottom: 8px; 49 | } 50 | 51 | .header-content { 52 | position: relative; 53 | z-index: 1; 54 | padding-right: 60px; 55 | } 56 | 57 | .logo { 58 | color: #ffffff; 59 | background: rgba(255, 255, 255); 60 | width: 40px; 61 | height: 40px; 62 | border-radius: 8px; 63 | margin-right: 15px; 64 | } 65 | 66 | .brand-name { 67 | font-size: 24px; 68 | font-weight: 700; 69 | letter-spacing: -0.5px; 70 | } 71 | 72 | .header h2 { 73 | font-size: 24px; 74 | font-weight: 400; 75 | margin: 0 0 12px 0; 76 | line-height: 1.3; 77 | } 78 | 79 | .header p { 80 | font-size: 16px; 81 | line-height: 1.4; 82 | margin: 0; 83 | opacity: 0.9; 84 | } 85 | 86 | /* Content Sections */ 87 | .section { 88 | margin: 12px; 89 | padding: 16px; 90 | border-radius: 8px; 91 | background: white; 92 | box-shadow: 0 1px 2px rgba(0, 0, 0, 0.04); 93 | } 94 | 95 | .summary-section { 96 | background: linear-gradient(135deg, #e0e7ff 0%, #f0f7ff 100%); 97 | } 98 | 99 | .section-title { 100 | color: #4f46e5; 101 | font-size: 18px; 102 | font-weight: 700; 103 | margin-bottom: 12px; 104 | padding-bottom: 8px; 105 | border-bottom: 1px solid #b0b8e6; 106 | } 107 | 108 | /* News Items */ 109 | .news-item { 110 | padding: 12px; 111 | margin-bottom: 12px; 112 | background: white; 113 | border-radius: 6px; 114 | border: 1px solid #e5e7eb; 115 | } 116 | 117 | .news-title { 118 | color: #4338ca; 119 | font-size: 16px; 120 | font-weight: 600; 121 | margin-bottom: 6px; 122 | } 123 | 124 | .news-item p { 125 | margin: 0 0 8px 0; 126 | font-size: 14px; 127 | line-height: 1.4; 128 | } 129 | 130 | /* Trending Button */ 131 | .trending-button { 132 | display: inline-flex; 133 | align-items: center; 134 | background: linear-gradient(135deg, #f0f7ff 0%, #e0e7ff 100%); 135 | padding: 4px 10px; 136 | border-radius: 12px; 137 | font-size: 12px; 138 | color: #4338ca; 139 | margin-top: 8px; 140 | } 141 | 142 | .trending-button i { 143 | margin-right: 6px; 144 | color: #6366f1; 145 | } 146 | 147 | /* Share Section */ 148 | .share-section { 149 | /* background: linear-gradient(135deg, #818cf8 0%, #6366f1 100%);*/ 150 | background: linear-gradient(135deg, #f0f7ff 0%, #e0e7ff 100%); 151 | color: black; 152 | text-align: center; 153 | padding: 20px 16px; 154 | } 155 | 156 | .share-button { 157 | padding: 8px 16px; 158 | font-size: 13px; 159 | border-radius: 16px; 160 | margin: 6px; 161 | } 162 | 163 | /* Feedback Section */ 164 | .feedback-section { 165 | background: linear-gradient(135deg, #f0f7ff 0%, #e0e7ff 100%); 166 | text-align: center; 167 | padding: 20px 16px; 168 | } 169 | 170 | .feedback-button { 171 | padding: 8px 16px; 172 | border-radius: 16px; 173 | font-size: 13px; 174 | margin: 0 6px; 175 | } 176 | 177 | /* Read Time */ 178 | .read-time { 179 | display: inline-flex; 180 | align-items: center; 181 | padding: 4px 10px; 182 | border-radius: 12px; 183 | margin-top: 8px; 184 | font-size: 12px; 185 | } 186 | 187 | /* Footer */ 188 | .footer { 189 | background: linear-gradient(135deg, #4338ca 0%, #3730a3 100%); 190 | color: white; 191 | padding: 20px 16px; 192 | text-align: center; 193 | font-size: 12px; 194 | } 195 | 196 | .footer a { 197 | padding: 0 8px; 198 | } 199 | 200 | /* Responsive Design */ 201 | @media (max-width: 600px) { 202 | .section { 203 | margin: 8px; 204 | padding: 12px; 205 | } 206 | 207 | .header { 208 | padding: 20px; 209 | margin: 4px 8px 8px 8px; 210 | } 211 | 212 | .header h2 { 213 | font-size: 20px; 214 | } 215 | 216 | .news-item { 217 | padding: 10px; 218 | margin-bottom: 10px; 219 | } 220 | } -------------------------------------------------------------------------------- /db_handler/db.py: -------------------------------------------------------------------------------- 1 | """ 2 | Database support functions. 3 | The idea is that none of the individual scripts deal directly with the file system. 4 | Any of the file system I/O and the associated settings are in this single file. 5 | """ 6 | 7 | import os 8 | import sqlite3, zlib, pickle, tempfile 9 | from sqlitedict import SqliteDict 10 | from contextlib import contextmanager 11 | 12 | 13 | DATA_DIR = 'data' 14 | 15 | @contextmanager 16 | def _tempfile(*args, **kws): 17 | """ Context for temporary file. 18 | Will find a free temporary filename upon entering 19 | and will try to delete the file on leaving 20 | Parameters 21 | ---------- 22 | suffix : string 23 | optional file suffix 24 | """ 25 | 26 | fd, name = tempfile.mkstemp(*args, **kws) 27 | os.close(fd) 28 | try: 29 | yield name 30 | finally: 31 | try: 32 | os.remove(name) 33 | except OSError as e: 34 | if e.errno == 2: 35 | pass 36 | else: 37 | raise e 38 | 39 | 40 | @contextmanager 41 | def open_atomic(filepath, *args, **kwargs): 42 | """ Open temporary file object that atomically moves to destination upon 43 | exiting. 44 | Allows reading and writing to and from the same filename. 45 | Parameters 46 | ---------- 47 | filepath : string 48 | the file path to be opened 49 | fsync : bool 50 | whether to force write the file to disk 51 | kwargs : mixed 52 | Any valid keyword arguments for :code:`open` 53 | """ 54 | fsync = kwargs.pop('fsync', False) 55 | 56 | with _tempfile(dir=os.path.dirname(filepath)) as tmppath: 57 | with open(tmppath, *args, **kwargs) as f: 58 | yield f 59 | if fsync: 60 | f.flush() 61 | os.fsync(f.fileno()) 62 | os.rename(tmppath, filepath) 63 | 64 | def safe_pickle_dump(obj, fname): 65 | """ 66 | prevents a case where one process could be writing a pickle file 67 | while another process is reading it, causing a crash. the solution 68 | is to write the pickle file to a temporary file and then move it. 69 | """ 70 | with open_atomic(fname, 'wb') as f: 71 | pickle.dump(obj, f, -1) # -1 specifies highest binary protocol 72 | 73 | # ----------------------------------------------------------------------------- 74 | 75 | class CompressedSqliteDict(SqliteDict): 76 | """ overrides the encode/decode methods to use zlib, so we get compressed storage """ 77 | 78 | def __init__(self, *args, **kwargs): 79 | 80 | def encode(obj): 81 | return sqlite3.Binary(zlib.compress(pickle.dumps(obj, pickle.HIGHEST_PROTOCOL))) 82 | 83 | def decode(obj): 84 | return pickle.loads(zlib.decompress(bytes(obj))) 85 | 86 | super().__init__(*args, **kwargs, encode=encode, decode=decode) 87 | 88 | # ----------------------------------------------------------------------------- 89 | """ 90 | some docs to self: 91 | flag='c': default mode, open for read/write, and creating the db/table if necessary 92 | flag='r': open for read-only 93 | """ 94 | 95 | # stores info about papers, and also their lighter-weight metadata 96 | PAPERS_DB_FILE = os.path.join(DATA_DIR, 'papers.db') 97 | # stores account-relevant info, like which tags exist for which papers 98 | DICT_DB_FILE = os.path.join(DATA_DIR, 'dict.db') 99 | 100 | def get_papers_db(flag='r', autocommit=True): 101 | assert flag in ['r', 'c'] 102 | pdb = CompressedSqliteDict(PAPERS_DB_FILE, tablename='papers', flag=flag, autocommit=autocommit) 103 | return pdb 104 | 105 | def get_metas_db(flag='r', autocommit=True): 106 | assert flag in ['r', 'c'] 107 | mdb = SqliteDict(PAPERS_DB_FILE, tablename='metas', flag=flag, autocommit=autocommit) 108 | return mdb 109 | 110 | def get_tags_db(flag='r', autocommit=True): 111 | assert flag in ['r', 'c'] 112 | tdb = CompressedSqliteDict(DICT_DB_FILE, tablename='tags', flag=flag, autocommit=autocommit) 113 | return tdb 114 | 115 | def get_last_active_db(flag='r', autocommit=True): 116 | assert flag in ['r', 'c'] 117 | ladb = SqliteDict(DICT_DB_FILE, tablename='last_active', flag=flag, autocommit=autocommit) 118 | return ladb 119 | 120 | def get_email_db(flag='r', autocommit=True): 121 | assert flag in ['r', 'c'] 122 | edb = SqliteDict(DICT_DB_FILE, tablename='email', flag=flag, autocommit=autocommit) 123 | return edb 124 | 125 | # ----------------------------------------------------------------------------- 126 | """ 127 | our "feature store" is currently just a pickle file, may want to consider hdf5 in the future 128 | """ 129 | 130 | # stores tfidf features a bunch of other metadata 131 | FEATURES_FILE = os.path.join(DATA_DIR, 'features.p') 132 | 133 | def save_features(features): 134 | """ takes the features dict and save it to disk in a simple pickle file """ 135 | safe_pickle_dump(features, FEATURES_FILE) 136 | 137 | def load_features(): 138 | """ loads the features dict from disk """ 139 | with open(FEATURES_FILE, 'rb') as f: 140 | features = pickle.load(f) 141 | return features 142 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AiLert ![logo.svg](static/logo.svg) 2 | 3 | An open-source AI newsletter platform that aggregates and curates AI content from across the internet. 4 | 5 | ## Overview 6 | AiLert automatically aggregates content from 150+ sources including research papers, news sites, GitHub repositories, and events to create customizable AI newsletters. Built with Python and powered by AWS, it helps communities and teams stay updated with the latest in AI. 7 | 8 | ## Features 9 | - 📚 Multi-source aggregation (150+ sources) 10 | - 🎯 Smart content categorization 11 | - 📊 Engagement tracking 12 | - ⚡ Async content processing 13 | - 📧 Customizable newsletter templates 14 | - 📅 Daily and weekly digest options 15 | 16 | ## Content Sources 17 | - Research Papers (arXiv) 18 | - Industry News (RSS feeds) 19 | - GitHub Trending Repositories 20 | - AI Competitions & Events 21 | - Product Launches 22 | - Technical Blogs 23 | 24 | ## Tech Stack 25 | - Python 3.8+ 26 | - Flask 27 | - AWS DynamoDB 28 | - BeautifulSoup4 29 | - Feedparser 30 | - Schedule 31 | - Pydantic 32 | - uvicorn 33 | 34 | ## 📫 How to Subscribe 35 | 36 | 1. Visit https://ailert.tech 37 | 2. Navigate to the newsletter section 38 | 3. Enter your email address 39 | 4. Confirm your subscription 40 | 41 | ## ✨ What Our Readers Say 42 | 43 | `"AIlert's newsletter helps me stay on top of AI developments without getting overwhelmed" - Tech Lead at Fortune 500` 44 | 45 | 46 | `"The perfect blend of technical depth and practical insights" - AI Researcher` 47 | 48 | ## 🔒 Your Privacy Matters 49 | 50 | - No spam, ever 51 | - Unsubscribe anytime 52 | - Your data is never shared or sold 53 | 54 | ## 📅 Publication Schedule 55 | Receive our carefully curated insights every week, delivered straight to your inbox. 56 | 57 | ## Installation 58 | 59 | 1. Clone the repository: 60 | ```bash 61 | git clone https://github.com/yourusername/ailert.git 62 | cd ailert 63 | ``` 64 | 65 | 2. Install dependencies: 66 | ```bash 67 | pip install -r requirements.txt 68 | ``` 69 | 70 | 3. Set up AWS credentials: 71 | ```bash 72 | export AWS_ACCESS_KEY_ID="your_access_key" 73 | export AWS_SECRET_ACCESS_KEY="your_secret_key" 74 | export AWS_REGION="your_region" 75 | ``` 76 | 77 | 4. Run the application: 78 | ```bash 79 | python main.py 80 | ``` 81 | 82 | ## Project Structure 83 | ``` 84 | ailert/ 85 | ├── builder/ # Newsletter generation 86 | ├── db_handler/ # Db operations manager 87 | ├── app/ # Core functions of the application 88 | ├── router/ # REST Api routes 89 | ├── services/ # Content aggregation services 90 | ├── static/ # Templates and assets 91 | ├── utils/ # Application common utilities 92 | ├── main.py # Flask application 93 | └── requirements.txt # Dependencies 94 | ``` 95 | 96 | ## Contributing 97 | We welcome contributions of all kinds! Here are some ways you can help: 98 | 99 | ### Development 100 | - Add new content sources 101 | - Improve content categorization 102 | - Optimize performance 103 | - Add new features 104 | - Fix bugs 105 | - Write tests 106 | 107 | ### Documentation 108 | - Improve technical docs 109 | - Write tutorials 110 | - Add code comments 111 | - Create examples 112 | 113 | ### Design 114 | - Improve newsletter templates 115 | - Create visual assets 116 | - Enhance UI/UX 117 | 118 | ### Content 119 | - Add new RSS feeds 120 | - Improve content filtering 121 | - Suggest new features 122 | 123 | ## Getting Started with Contributing 124 | 125 | 1. Fork the repository 126 | 2. Create a new branch 127 | ```bash 128 | git checkout -b feature/your-feature 129 | ``` 130 | 3. Make your changes 131 | 4. Write or update tests 132 | 5. Submit a pull request 133 | 134 | ### Development Setup 135 | 1. Install development dependencies: 136 | ```bash 137 | pip install -r requirements-dev.txt 138 | ``` 139 | 140 | 2. Run tests: 141 | ```bash 142 | python -m pytest 143 | ``` 144 | 145 | ## API Documentation 146 | 147 | ### Newsletter Builder 148 | ```python 149 | from builder.builder import NewsletterBuilder 150 | 151 | # Create daily newsletter 152 | daily = NewsletterBuilder({ 153 | "gh_url": "github_url", 154 | "gh_ftype": "daily" 155 | }) 156 | daily.set_sections(["news"]) 157 | content = await daily.section_generator() 158 | ``` 159 | 160 | ### Content Services 161 | Each service handles different content types: 162 | - `NewsService`: Industry news 163 | - `ResearchService`: Research papers 164 | - `GitHubScanner`: Trending repositories 165 | - `ProductService`: New AI products 166 | - `CompetitionService`: AI competitions 167 | - `EventsService`: Upcoming events 168 | 169 | ## License 170 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. 171 | 172 | ## Acknowledgments 173 | - All our amazing contributors 174 | - The open-source community 175 | - RSS feed providers 176 | - Content creators 177 | 178 | ## Contact 179 | - Create an issue for bug reports 180 | - Start a discussion for feature requests 181 | - Join our Discord community [link] 182 | 183 | ## Roadmap 184 | - [ ] Add more content sources 185 | - [ ] Implement ML-based content ranking 186 | - [ ] Add personalization options 187 | - [ ] Create API endpoints 188 | - [ ] Add email delivery system 189 | - [ ] Improve template customization 190 | 191 | --- 192 | Built with ❤️ for the AI community -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement. 63 | All complaints will be reviewed and investigated promptly and fairly. 64 | 65 | All community leaders are obligated to respect the privacy and security of the 66 | reporter of any incident. 67 | 68 | ## Enforcement Guidelines 69 | 70 | Community leaders will follow these Community Impact Guidelines in determining 71 | the consequences for any action they deem in violation of this Code of Conduct: 72 | 73 | ### 1. Correction 74 | 75 | **Community Impact**: Use of inappropriate language or other behavior deemed 76 | unprofessional or unwelcome in the community. 77 | 78 | **Consequence**: A private, written warning from community leaders, providing 79 | clarity around the nature of the violation and an explanation of why the 80 | behavior was inappropriate. A public apology may be requested. 81 | 82 | ### 2. Warning 83 | 84 | **Community Impact**: A violation through a single incident or series 85 | of actions. 86 | 87 | **Consequence**: A warning with consequences for continued behavior. No 88 | interaction with the people involved, including unsolicited interaction with 89 | those enforcing the Code of Conduct, for a specified period of time. This 90 | includes avoiding interactions in community spaces as well as external channels 91 | like social media. Violating these terms may lead to a temporary or 92 | permanent ban. 93 | 94 | ### 3. Temporary Ban 95 | 96 | **Community Impact**: A serious violation of community standards, including 97 | sustained inappropriate behavior. 98 | 99 | **Consequence**: A temporary ban from any sort of interaction or public 100 | communication with the community for a specified period of time. No public or 101 | private interaction with the people involved, including unsolicited interaction 102 | with those enforcing the Code of Conduct, is allowed during this period. 103 | Violating these terms may lead to a permanent ban. 104 | 105 | ### 4. Permanent Ban 106 | 107 | **Community Impact**: Demonstrating a pattern of violation of community 108 | standards, including sustained inappropriate behavior, harassment of an 109 | individual, or aggression toward or disparagement of classes of individuals. 110 | 111 | **Consequence**: A permanent ban from any sort of public interaction within 112 | the community. 113 | 114 | ## Attribution 115 | 116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 117 | version 2.0, available at 118 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 119 | 120 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 121 | enforcement ladder](https://github.com/mozilla/diversity). 122 | 123 | [homepage]: https://www.contributor-covenant.org 124 | 125 | For answers to common questions about this code of conduct, see the FAQ at 126 | https://www.contributor-covenant.org/faq. Translations are available at 127 | https://www.contributor-covenant.org/translations. 128 | -------------------------------------------------------------------------------- /utils/utility.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import csv 4 | import hashlib 5 | import logging 6 | from pathlib import Path 7 | from datetime import datetime 8 | from typing import Any, Dict, List, Optional 9 | 10 | 11 | def load_template(template_path="static/newsletter.html") -> str: 12 | with open(template_path, 'r') as f: 13 | return f.read() 14 | 15 | def generate_deterministic_id(item: Dict[str, Any], key_fields: List[str], prefix: str = "item") -> str: 16 | """ 17 | Example: 18 | item = { 19 | "product_name": "Widget", 20 | "color": "blue", 21 | "timestamp": "2024-01-01" 22 | } 23 | id = generate_deterministic_id( 24 | item, 25 | key_fields=["product_name", "color"], 26 | prefix="prod" 27 | ) 28 | # Result: prod-a1b2c3d4... 29 | """ 30 | key_fields.sort() 31 | values = [] 32 | for field in key_fields: 33 | if field not in item: 34 | raise KeyError(f"Required field '{field}' not found in item") 35 | value = item[field] 36 | values.append(str(value)) 37 | 38 | combined_string = "||".join(values) 39 | hash_object = hashlib.sha256(combined_string.encode()) 40 | hash_hex = hash_object.hexdigest() 41 | short_hash = hash_hex[:12] 42 | return f"{prefix}-{short_hash}" 43 | 44 | def truncate_text(text: str, max_length: int = 200) -> str: 45 | """Truncate text to specified length at the nearest word boundary.""" 46 | if len(text) <= max_length: 47 | return text 48 | truncated = text[:max_length].rsplit(' ', 1)[0] 49 | return truncated.rstrip('.,!?:;') 50 | 51 | def get_formatted_timestamp(): 52 | """Get current timestamp in YYYY-MM-DD format""" 53 | return datetime.now().strftime("%Y-%m-%d") 54 | 55 | 56 | def is_valid_email(email): 57 | """Validate email format""" 58 | pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$' 59 | return re.match(pattern, email) is not None 60 | 61 | 62 | def save_to_csv(email): 63 | csv_file = 'db_handler/vault/recipients.csv' 64 | file_exists = os.path.exists(csv_file) 65 | 66 | try: 67 | with open(csv_file, 'a', newline='') as file: 68 | writer = csv.writer(file) 69 | if not file_exists: 70 | writer.writerow(['email', 'subscribed_at']) 71 | writer.writerow([email, get_formatted_timestamp()]) 72 | return True 73 | except Exception as e: 74 | logging.error(f"Error saving to CSV: {str(e)}") 75 | return False 76 | 77 | 78 | def is_email_subscribed(email): 79 | """Check if email already exists in CSV""" 80 | csv_file = 'db_handler/vault/recipients.csv' 81 | if not os.path.exists(csv_file): 82 | return False 83 | 84 | try: 85 | with open(csv_file, 'r') as file: 86 | reader = csv.reader(file) 87 | next(reader) # Skip header 88 | return any(row[0] == email for row in reader) 89 | except Exception as e: 90 | logging.error(f"Error checking subscription: {str(e)}") 91 | return False 92 | 93 | 94 | def inline_css(html_content: str, css_path: Optional[str] = None) -> str: 95 | """Replace CSS link tags with the actual CSS content in the HTML string.""" 96 | css_link_pattern = r']+rel="stylesheet"[^>]+href="([^"]+)"[^>]*>' 97 | 98 | def replace_css_link(match): 99 | css_file = match.group(1) 100 | 101 | # If css_path is provided, use it, otherwise look in current directory 102 | if css_path: 103 | css_file_path = Path(css_path) / Path(css_file).name 104 | else: 105 | css_file_path = Path(css_file) 106 | 107 | try: 108 | with open(css_file_path, 'r', encoding='utf-8') as f: 109 | css_content = f.read() 110 | return f'' 111 | except FileNotFoundError: 112 | print(f"Warning: CSS file not found: {css_file_path}") 113 | return match.group(0) # Keep original link tag if file not found 114 | except Exception as e: 115 | print(f"Error reading CSS file: {e}") 116 | return match.group(0) 117 | 118 | # Replace all CSS link tags with style tags 119 | return re.sub(css_link_pattern, replace_css_link, html_content) 120 | 121 | 122 | def inline_svg_images(html_content: str, svg_path: Optional[str] = None) -> str: 123 | """Replace SVG image tags with the actual SVG content in the HTML string.""" 124 | img_pattern = r']+src="([^"]+\.svg)"[^>]*>' 125 | 126 | def replace_img_tag(match): 127 | # Get the full img tag and the src value 128 | img_tag = match.group(0) 129 | svg_file = match.group(1) 130 | 131 | # Extract the class and alt attributes if they exist 132 | class_match = re.search(r'class="([^"]+)"', img_tag) 133 | alt_match = re.search(r'alt="([^"]+)"', img_tag) 134 | 135 | class_attr = f' class="{class_match.group(1)}"' if class_match else '' 136 | alt_attr = f' aria-label="{alt_match.group(1)}"' if alt_match else '' 137 | 138 | # If svg_path is provided, use it, otherwise look in current directory 139 | if svg_path: 140 | svg_file_path = Path(svg_path) / Path(svg_file).name 141 | else: 142 | svg_file_path = Path(svg_file) 143 | 144 | try: 145 | with open(svg_file_path, 'r', encoding='utf-8') as f: 146 | svg_content = f.read() 147 | svg_content = svg_content.replace(' str: 32 | if not text: 33 | return '' 34 | soup = BeautifulSoup(text, 'html.parser') 35 | return soup.get_text().strip() 36 | 37 | def _parse_date(self, date_str: str) -> datetime: 38 | try: 39 | parsed_date = parsedate_to_datetime(date_str) 40 | return parsed_date.replace(tzinfo=pytz.UTC) 41 | except: 42 | return datetime.min.replace(tzinfo=pytz.UTC) 43 | 44 | def _fetch_feed(self, url: str) -> List[Dict]: 45 | try: 46 | feed = feedparser.parse(url) 47 | news_items = [] 48 | 49 | for entry in feed.entries: 50 | description = entry.get('description', '') 51 | if not description and 'content' in entry: 52 | description = entry.content[0].value 53 | 54 | additional_info = { 55 | 'published_date': self._parse_date(entry.get('published', '')), 56 | 'author': entry.get('author', None), 57 | 'categories': entry.get('tags', []), 58 | 'guid': entry.get('id', None) 59 | } 60 | 61 | item = { 62 | 'title': entry.get('title', ''), 63 | 'description': self._clean_html(description), 64 | 'link': entry.get('link', ''), 65 | 'source': feed.feed.get('title', 'Unknown Source'), 66 | 'engagement': None, # Can be updated if engagement metrics are available 67 | 'additional_info': additional_info, 68 | 'full_text': f"{entry.get('title', '')} {self._clean_html(description)}" # for ranking 69 | } 70 | 71 | news_items.append(item) 72 | 73 | return news_items 74 | except Exception as e: 75 | print(f"Error fetching feed {url}: {str(e)}") 76 | return [] 77 | 78 | def _calculate_importance_scores(self, news_items: List[Dict]) -> List[float]: 79 | if not news_items: 80 | return [] 81 | try: 82 | texts = [item['full_text'] for item in news_items] 83 | x = self.tfidf.fit_transform(texts) 84 | doc_lengths = x.sum(axis=1).A1 85 | term_importance = np.sqrt(np.asarray(x.mean(axis=0)).ravel()) 86 | scores = doc_lengths * np.dot(x.toarray(), term_importance) 87 | if len(scores) > 0: 88 | scores = (scores - scores.min()) / (scores.max() - scores.min() + 1e-8) 89 | return scores.tolist() 90 | except Exception as e: 91 | logger.error(f"Error calculating importance scores: {str(e)}") 92 | raise RuntimeError(f"Failed to calculate importance scores: {str(e)}") 93 | 94 | def _calculate_read_time(self, text: str, words_per_minute: int = 200) -> int: 95 | words = len(text.strip().split()) 96 | total_minutes = words / words_per_minute 97 | minutes = int(total_minutes) 98 | seconds = int((total_minutes - minutes) * 60) 99 | return minutes 100 | 101 | async def get_highlights(self, max_items: int = 5) -> List[NewsItem]: 102 | today = datetime.now(pytz.UTC) 103 | all_news = [] 104 | with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: 105 | future_to_url = { 106 | executor.submit(self._fetch_feed, url): url 107 | for url in self.rss_urls 108 | } 109 | 110 | for future in concurrent.futures.as_completed(future_to_url): 111 | news_items = future.result() 112 | all_news.extend(news_items) 113 | 114 | today_news = [ 115 | item for item in all_news 116 | if item['additional_info']['published_date'].date() == today.date() 117 | ] 118 | 119 | if not today_news: 120 | return [] 121 | 122 | importance_scores = self._calculate_importance_scores(today_news) 123 | 124 | for item, score in zip(today_news, importance_scores): 125 | item['additional_info']['importance_score'] = float(score) 126 | 127 | if len(today_news) > 1: 128 | sorted_news = sorted( 129 | today_news, 130 | key=lambda x: ( 131 | x['additional_info']['importance_score'], 132 | x['additional_info']['published_date'] 133 | ), 134 | reverse=True 135 | ) 136 | else: 137 | sorted_news = today_news 138 | 139 | for item in sorted_news[:max_items]: 140 | read_time = self._calculate_read_time(item['description']) 141 | self.news.append(NewsItem( 142 | title=item['title'], 143 | description=item['description'], 144 | link=item['link'], 145 | read_time=read_time, 146 | source=item['source'], 147 | engagement=item['engagement'], 148 | additional_info=item['additional_info'] 149 | )) 150 | self.summary.append({"title": item['title'], "read_time": read_time}) 151 | return self.summary 152 | 153 | async def get_news(self): 154 | return self.news 155 | -------------------------------------------------------------------------------- /services/event_service.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import feedparser 3 | import requests 4 | from bs4 import BeautifulSoup 5 | from typing import List, Dict 6 | from db_handler import Event, sites 7 | 8 | logger = logging.getLogger(__name__) 9 | logging.basicConfig(level=logging.INFO) 10 | 11 | class EventsService: 12 | def __init__(self, rss_feed_url=sites["events_feed"], html_links=sites["events_url"], top_n=3): 13 | self.rss_feed_url = rss_feed_url 14 | self.html_links = html_links # Fixed variable name from html_link to html_links 15 | self.top_n = top_n 16 | self.events = [] 17 | self.headers = { 18 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 19 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 20 | 'Accept-Language': 'en-US,en;q=0.5', 21 | 'Connection': 'keep-alive', 22 | } 23 | 24 | def _get_events_from_rss_feed(self) -> List[Dict]: 25 | try: 26 | feed = feedparser.parse(self.rss_feed_url) 27 | if not feed.entries: 28 | logger.warning(f"No entries found in RSS feed: {self.rss_feed_url}") 29 | return [] 30 | 31 | events = [] 32 | for entry in feed.entries[:self.top_n]: 33 | event = { 34 | "title": entry.get('title', ''), 35 | "description": entry.get('description', ''), 36 | "date": entry.get('published', ''), 37 | "location": "", # RSS feed might not have location 38 | "engagement": 0 39 | } 40 | events.append(event) 41 | return events 42 | except Exception as e: 43 | logger.error(f"Error parsing RSS feed: {e}") 44 | return [] 45 | 46 | def _get_events_from_html_link(self) -> List[Dict]: # Fixed method name typo 47 | events = [] 48 | for url in self.html_links: 49 | try: 50 | response = requests.get(url, headers=self.headers, timeout=10) 51 | response.raise_for_status() 52 | soup = BeautifulSoup(response.text, 'html.parser') 53 | 54 | if "conferencealerts" in url: 55 | # Updated selector based on current site structure 56 | events.extend(self._parse_conference_alerts(soup)) 57 | elif "aideadlin.es" in url: 58 | events.extend(self._parse_aideadlines(soup)) 59 | 60 | if len(events) >= self.top_n: 61 | return events[:self.top_n] 62 | except requests.RequestException as e: 63 | logger.error(f"Error fetching {url}: {e}") 64 | continue 65 | except Exception as e: 66 | logger.error(f"Error processing {url}: {e}") 67 | continue 68 | return events 69 | 70 | def _parse_conference_alerts(self, soup: BeautifulSoup) -> List[Dict]: 71 | events = [] 72 | # Updated selectors based on current site structure 73 | items = soup.find_all('div', class_='conference-item') # Changed from 'event-item' 74 | 75 | if not items: 76 | # Fallback to alternative selectors 77 | items = soup.find_all('div', class_='conf-item') 78 | 79 | for item in items: 80 | try: 81 | title_elem = item.find(['h2', 'h3', 'h4']) or item.find(class_='conf-title') 82 | date_elem = item.find(class_=['date', 'conf-date']) 83 | location_elem = item.find(class_=['location', 'conf-location']) 84 | desc_elem = item.find(class_=['description', 'conf-description']) 85 | 86 | if not title_elem: 87 | continue 88 | 89 | event = { 90 | "title": title_elem.text.strip(), 91 | "date": date_elem.text.strip() if date_elem else "", 92 | "location": location_elem.text.strip() if location_elem else "", 93 | "description": desc_elem.text.strip() if desc_elem else "", 94 | "engagement": 0 # Default value if not found 95 | } 96 | events.append(event) 97 | except Exception as e: 98 | logger.error(f"Error parsing conference alert item: {e}") 99 | continue 100 | return events 101 | 102 | def _parse_aideadlines(self, soup: BeautifulSoup) -> List[Dict]: 103 | events = [] 104 | items = soup.select('.conference-item, .deadline-item') 105 | 106 | for item in items: 107 | try: 108 | title_elem = item.find(['h3', 'h4']) or item.select_one('.conf-title') 109 | date_elem = item.select_one('.deadline, .date') 110 | location_elem = item.select_one('.location, .venue') 111 | desc_elem = item.select_one('.description, .abstract') 112 | 113 | if not title_elem: 114 | continue 115 | 116 | event = { 117 | "title": title_elem.text.strip(), 118 | "date": date_elem.text.strip() if date_elem else "", 119 | "location": location_elem.text.strip() if location_elem else "", 120 | "description": desc_elem.text.strip() if desc_elem else "", 121 | "engagement": 0 # Default if not found 122 | } 123 | events.append(event) 124 | except Exception as e: 125 | logger.error(f"Error parsing aideadlines item: {e}") 126 | continue 127 | return events 128 | 129 | async def get_upcoming_events(self): 130 | # Get events from both sources 131 | html_events = self._get_events_from_html_link() 132 | rss_events = self._get_events_from_rss_feed() 133 | 134 | # Combine and deduplicate events 135 | temp_dict = {event["title"]: event for event in html_events + rss_events} 136 | temp_list = list(temp_dict.values()) 137 | 138 | # Create Event objects 139 | new_events = [ 140 | Event( 141 | title=event["title"], 142 | date=event["date"], 143 | location=event["location"], 144 | description=event["description"] 145 | ) for event in temp_list[:self.top_n] 146 | ] 147 | 148 | self.events.extend(new_events) 149 | return self.events -------------------------------------------------------------------------------- /services/apps/arx_service.py: -------------------------------------------------------------------------------- 1 | import time 2 | import random 3 | import logging 4 | import urllib.request 5 | import feedparser 6 | import numpy as np 7 | from sklearn import svm 8 | from typing import List, Dict, Any, Optional, Tuple 9 | 10 | 11 | class ArxivScanner: 12 | def __init__(self, base_url: str, top_n: int = 5): 13 | self.base_url = base_url 14 | self.top_n = top_n 15 | self.logger = logging.getLogger(__name__) 16 | self.default_query = 'cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.AI+OR+cat:cs.NE+OR+cat:cs.RO' 17 | 18 | def _get_response(self, search_query: str, start_index: int = 0) -> bytes: 19 | query_url = f'{self.base_url}search_query={search_query}&sortBy=lastUpdatedDate&start={start_index}&max_results=100' 20 | 21 | with urllib.request.urlopen(query_url) as url: 22 | response = url.read() 23 | if url.status != 200: 24 | raise Exception(f"ArXiv API returned status {url.status}") 25 | return response 26 | 27 | def _parse_arxiv_url(self, url: str) -> tuple: 28 | idv = url[url.rfind('/') + 1:] 29 | parts = idv.split('v') 30 | return idv, parts[0], int(parts[1]) 31 | 32 | def _parse_response(self, response: bytes) -> List[Dict[str, Any]]: 33 | def encode_feedparser_dict(d): 34 | if isinstance(d, feedparser.FeedParserDict) or isinstance(d, dict): 35 | return {k: encode_feedparser_dict(d[k]) for k in d.keys()} 36 | elif isinstance(d, list): 37 | return [encode_feedparser_dict(k) for k in d] 38 | return d 39 | 40 | papers = [] 41 | parse = feedparser.parse(response) 42 | 43 | for entry in parse.entries: 44 | paper = encode_feedparser_dict(entry) 45 | idv, raw_id, version = self._parse_arxiv_url(paper['id']) 46 | 47 | paper['_idv'] = idv 48 | paper['_id'] = raw_id 49 | paper['_version'] = version 50 | paper['_time'] = time.mktime(paper['updated_parsed']) 51 | paper['_time_str'] = time.strftime('%b %d %Y', paper['updated_parsed']) 52 | 53 | papers.append(paper) 54 | 55 | return papers 56 | 57 | def rank_papers(self, papers: List[Dict], method: str = 'time', 58 | query: str = None) -> List[Tuple[Dict, float]]: 59 | if not papers: 60 | return [] 61 | 62 | if method == 'time': 63 | scored_papers = [(p, -p['_time']) for p in papers] 64 | 65 | elif method == 'random': 66 | scored_papers = [(p, random.random()) for p in papers] 67 | 68 | elif method == 'search' and query: 69 | query_terms = query.lower().strip().split() 70 | scored_papers = [] 71 | 72 | for p in papers: 73 | score = 0.0 74 | score += 20.0 * sum(1 for term in query_terms if term in p['title'].lower()) 75 | score += 10.0 * sum( 76 | 1 for term in query_terms if term in ' '.join(a['name'].lower() for a in p['authors'])) 77 | score += 5.0 * sum(1 for term in query_terms if term in p['summary'].lower()) 78 | scored_papers.append((p, score)) 79 | 80 | elif method == 'svm': 81 | from sklearn.feature_extraction.text import TfidfVectorizer 82 | 83 | # Prepare text data 84 | texts = [] 85 | times = [] 86 | for p in papers: 87 | try: 88 | title = p['title'] 89 | authors = ' '.join(a['name'] for a in p['authors']) 90 | summary = p.get('summary', '') 91 | texts.append(f"{title} {authors} {summary}") 92 | times.append(-p['_time']) # Negative time for more recent = higher score 93 | except Exception as e: 94 | self.logger.error(f"Error processing paper: {e}") 95 | continue 96 | 97 | if not texts: 98 | return [(p, 0.0) for p in papers] 99 | 100 | # Create TF-IDF features 101 | vectorizer = TfidfVectorizer( 102 | max_features=1000, 103 | stop_words='english' 104 | ) 105 | X = vectorizer.fit_transform(texts) 106 | 107 | # Create binary labels based on median time 108 | median_time = np.median(times) 109 | y = np.array([1 if t > median_time else 0 for t in times]) 110 | 111 | # Train SVM 112 | clf = svm.LinearSVC( 113 | class_weight='balanced', 114 | random_state=42, 115 | max_iter=10000 116 | ) 117 | 118 | try: 119 | clf.fit(X, y) 120 | scores = clf.decision_function(X) 121 | scored_papers = [] 122 | for paper, score in zip(papers, scores): 123 | scored_papers.append((paper, float(score))) 124 | except Exception as e: 125 | self.logger.error(f"Error in SVM ranking: {e}") 126 | return [(p, -p['_time']) for p in papers] # Fallback to time-based ranking 127 | 128 | else: 129 | scored_papers = [(p, -p['_time']) for p in papers] 130 | 131 | return sorted(scored_papers, key=lambda x: x[1], reverse=True) 132 | 133 | def get_top_n_papers(self, search_query: Optional[str] = None, 134 | rank_method: str = 'svm') -> List[Dict[str, Any]]: 135 | query = search_query or self.default_query 136 | papers = [] 137 | start_index = 0 138 | 139 | while len(papers) < max(100, self.top_n): # Get more papers for better SVM training 140 | try: 141 | response = self._get_response(query, start_index) 142 | batch = self._parse_response(response) 143 | if not batch: 144 | break 145 | papers.extend(batch) 146 | start_index += len(batch) 147 | time.sleep(1 + random.uniform(0, 3)) 148 | except Exception as e: 149 | self.logger.error(f"Error fetching papers: {e}") 150 | break 151 | ranked_papers = self.rank_papers(papers, method=rank_method, query=search_query) 152 | 153 | return [{ 154 | 'id': p['_id'], 155 | 'title': p['title'], 156 | 'authors': [a['name'] for a in p['authors']], 157 | 'abstract': p['summary'], 158 | 'categories': [t['term'] for t in p['tags']], 159 | '_time_str': p['_time_str'], 160 | 'url': f"https://arxiv.org/abs/{p['_id']}", 161 | 'pdf_url': f"https://arxiv.org/pdf/{p['_id']}.pdf", 162 | 'score': score, 163 | 'publication': "ARXIV" 164 | } for p, score in ranked_papers[:self.top_n]] -------------------------------------------------------------------------------- /db_handler/dynamo.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | import boto3 3 | from utils import utility 4 | from botocore.exceptions import ClientError 5 | from typing import Dict, List, Optional, Any 6 | 7 | 8 | class Dynamo: 9 | def __init__(self, region_name: str): 10 | self.dynamodb = boto3.resource('dynamodb', region_name=region_name) 11 | self.client = boto3.client('dynamodb', region_name=region_name) 12 | 13 | def create_table(self, 14 | table_name: str, 15 | key_schema: List[Dict[str, str]], 16 | attribute_definitions: List[Dict[str, str]], 17 | provisioned_throughput: Optional[Dict[str, int]] = None) -> bool: 18 | try: 19 | if not provisioned_throughput: 20 | provisioned_throughput = { 21 | 'ReadCapacityUnits': 5, 22 | 'WriteCapacityUnits': 5 23 | } 24 | 25 | table = self.dynamodb.create_table( 26 | TableName=table_name, 27 | KeySchema=key_schema, 28 | AttributeDefinitions=attribute_definitions, 29 | ProvisionedThroughput=provisioned_throughput 30 | ) 31 | table.wait_until_exists() 32 | return True 33 | except ClientError as e: 34 | print(f"Error creating table: {e}") 35 | return False 36 | 37 | def list_tables(self) -> List[str]: 38 | try: 39 | return self.client.list_tables()['TableNames'] 40 | except ClientError as e: 41 | print(f"Error listing tables: {e}") 42 | return [] 43 | 44 | def describe_table(self, table_name: str) -> Dict: 45 | try: 46 | return self.client.describe_table(TableName=table_name) 47 | except ClientError as e: 48 | print(f"Error describing table: {e}") 49 | return {} 50 | 51 | def table_exists(self, table_name: str) -> bool: 52 | try: 53 | self.client.describe_table(TableName=table_name) 54 | return True 55 | except ClientError: 56 | return False 57 | 58 | def delete_table(self, table_name: str) -> bool: 59 | try: 60 | table = self.dynamodb.Table(table_name) 61 | table.delete() 62 | table.wait_until_not_exists() 63 | return True 64 | except ClientError as e: 65 | print(f"Error deleting table: {e}") 66 | return False 67 | 68 | def add_item(self, table_name: str, partition_key: str, item: Dict[str, Any], auto_id: bool = True) -> str: 69 | try: 70 | table = self.dynamodb.Table(table_name) 71 | if auto_id and 'id' not in item: 72 | item[partition_key] = str(uuid.uuid4()) 73 | 74 | item['created_at'] = utility.get_formatted_timestamp() 75 | table.put_item(Item=item) 76 | return item.get('id', '') 77 | except ClientError as e: 78 | print(f"Error adding item: {e}") 79 | return "" 80 | 81 | def get_item(self, table_name: str, key: Dict[str, Any]) -> Dict: 82 | try: 83 | table = self.dynamodb.Table(table_name) 84 | response = table.get_item(Key=key) 85 | return response.get('Item', {}) 86 | except ClientError as e: 87 | print(f"Error getting item: {e}") 88 | return {} 89 | 90 | def update_item(self, table_name: str, key: Dict[str, Any], update_attrs: Dict[str, Any]) -> bool: 91 | try: 92 | table = self.dynamodb.Table(table_name) 93 | 94 | update_expr_parts = [] 95 | expr_attr_values = {} 96 | expr_attr_names = {} 97 | 98 | for attr_name, value in update_attrs.items(): 99 | attr_parts = attr_name.split('.') 100 | update_name = '#' + '_'.join(attr_parts) 101 | expr_attr_names[update_name] = attr_parts[-1] 102 | 103 | value_key = ':' + '_'.join(attr_parts) 104 | update_expr_parts.append(f"{update_name} = {value_key}") 105 | expr_attr_values[value_key] = value 106 | 107 | update_expr_parts.append('#updated_at = :updated_at') 108 | expr_attr_names['#updated_at'] = 'updated_at' 109 | expr_attr_values[':updated_at'] = utility.get_formatted_timestamp() 110 | 111 | update_expression = 'SET ' + ', '.join(update_expr_parts) 112 | 113 | table.update_item( 114 | Key=key, 115 | UpdateExpression=update_expression, 116 | ExpressionAttributeValues=expr_attr_values, 117 | ExpressionAttributeNames=expr_attr_names 118 | ) 119 | return True 120 | except ClientError as e: 121 | print(f"Error updating item: {e}") 122 | return False 123 | 124 | def delete_item(self, table_name: str, key: Dict[str, Any]) -> bool: 125 | try: 126 | table = self.dynamodb.Table(table_name) 127 | table.delete_item(Key=key) 128 | return True 129 | except ClientError as e: 130 | print(f"Error deleting item: {e}") 131 | return False 132 | 133 | def query_items(self, 134 | table_name: str, 135 | key_condition_expression: str, 136 | expression_values: Dict[str, Any], 137 | index_name: Optional[str] = None, 138 | filter_expression: Optional[str] = None, 139 | limit: Optional[int] = None) -> List[Dict]: 140 | """ 141 | Query items from the table 142 | 143 | Args: 144 | table_name: Name of the table 145 | key_condition_expression: KeyConditionExpression for the query 146 | expression_values: Dictionary of expression values 147 | index_name: Optional secondary index name 148 | filter_expression: Optional filter expression 149 | limit: Optional limit for results 150 | """ 151 | try: 152 | table = self.dynamodb.Table(table_name) 153 | params = { 154 | 'KeyConditionExpression': key_condition_expression, 155 | 'ExpressionAttributeValues': expression_values 156 | } 157 | 158 | if index_name: 159 | params['IndexName'] = index_name 160 | if filter_expression: 161 | params['FilterExpression'] = filter_expression 162 | if limit: 163 | params['Limit'] = limit 164 | 165 | response = table.query(**params) 166 | return response.get('Items', []) 167 | except ClientError as e: 168 | print(f"Error querying items: {e}") 169 | return [] 170 | 171 | def scan_items(self, 172 | table_name: str, 173 | filter_expression: Optional[str] = None, 174 | expression_values: Optional[Dict[str, Any]] = None, 175 | limit: Optional[int] = None) -> List[Dict]: 176 | """ 177 | Scan items from the table 178 | 179 | Args: 180 | table_name: Name of the table 181 | filter_expression: Optional filter expression 182 | expression_values: Optional dictionary of expression values 183 | limit: Optional limit for results 184 | """ 185 | try: 186 | table = self.dynamodb.Table(table_name) 187 | params = {} 188 | 189 | if filter_expression: 190 | params['FilterExpression'] = filter_expression 191 | if expression_values: 192 | params['ExpressionAttributeValues'] = expression_values 193 | if limit: 194 | params['Limit'] = limit 195 | 196 | response = table.scan(**params) 197 | return response.get('Items', []) 198 | except ClientError as e: 199 | print(f"Error scanning items: {e}") 200 | return [] -------------------------------------------------------------------------------- /db_handler/sample_vault/links.py: -------------------------------------------------------------------------------- 1 | rss_feed = ["https://machinelearningmastery.com/blog/feed/", 2 | "https://bair.berkeley.edu/blog/feed.xml", 3 | "http://news.mit.edu/rss/topic/artificial-intelligence2", 4 | "https://deepmind.com/blog/feed/basic/", 5 | "https://www.unite.ai/feed/", 6 | "https://ai2people.com/feed/", 7 | "https://hanhdbrown.com/feed/", 8 | "https://dailyai.com/feed/", 9 | "https://nyheter.aitool.se/feed/", 10 | "https://www.spritle.com/blog/feed/", 11 | "https://yatter.in/feed/", 12 | "https://www.shaip.com/feed/", 13 | "https://www.greataiprompts.com/feed/", 14 | "https://zerothprinciples.substack.com/feed", 15 | "https://airevolution.blog/feed/", 16 | "https://saal.ai/feed/", 17 | "https://aicorr.com/feed/", 18 | "https://qudata.com/en/news/rss.xml", 19 | "https://hanhdbrown.com/category/ai/feed/", 20 | "https://www.oreilly.com/radar/topics/ai-ml/feed/index.xml", 21 | "https://blogs.sas.com/content/topic/artificial-intelligence/feed/", 22 | "https://blogs.rstudio.com/ai/index.xml", 23 | "https://www.technologyreview.com/topic/artificial-intelligence/feed", 24 | "http://www.kdnuggets.com/feed", 25 | "https://research.aimultiple.com/feed/", 26 | "https://nanonets.com/blog/rss/", 27 | "https://www.datarobot.com/blog/feed/", 28 | "https://becominghuman.ai/feed", 29 | "https://bigdataanalyticsnews.com/category/artificial-intelligence/feed/", 30 | "https://blog.kore.ai/rss.xml", 31 | "https://www.clarifai.com/blog/rss.xml", 32 | "https://expertsystem.com/feed/", 33 | "https://theaisummer.com/feed.xml", 34 | "https://www.aiiottalk.com/feed/", 35 | "https://www.isentia.com/feed/", 36 | "https://chatbotslife.com/feed", 37 | "http://www.marketingaiinstitute.com/blog/rss.xml", 38 | "https://www.topbots.com/feed/", 39 | "https://www.artificiallawyer.com/feed/", 40 | "https://dlabs.ai/feed/", 41 | "https://www.aitimejournal.com/feed/", 42 | "https://insights.fusemachines.com/feed/", 43 | "https://intelligence.org/blog/feed/", 44 | "https://deepcognition.ai/feed/", 45 | "https://1reddrop.com/feed/", 46 | "https://www.viact.ai/blog-feed.xml", 47 | "https://robotwritersai.com/feed/", 48 | "https://aihub.org/feed/?cat=-473", 49 | "https://usmsystems.com/blog/feed/", 50 | "https://www.aiplusinfo.com/feed/", 51 | "https://metadevo.com/feed/", 52 | "https://www.cogitotech.com/feed/", 53 | "https://datamachina.substack.com/feed", 54 | "https://vue.ai/blog/feed/", 55 | "https://www.greatlearning.in/blog/category/artificial-intelligence/feed/", 56 | "https://topmarketingai.com/feed/", 57 | "https://appzoon.com/feed/", 58 | "https://medium.com/feed/@securechainai", 59 | "https://blogs.microsoft.com/ai/feed/", 60 | "https://chatbotsmagazine.com/feed", 61 | "https://findnewai.com/feed/", 62 | "http://kavita-ganesan.com/feed", 63 | "https://pandio.com/feed/", 64 | "https://www.danrose.ai/blog?format=rss", 65 | "https://www.edia.nl/edia-blog?format=rss", 66 | "http://www.eledia.org/e-air/feed/", 67 | "http://ankit-ai.blogspot.com/feeds/posts/default?alt=rss", 68 | "https://editorialia.com/feed/", 69 | "http://blog.datumbox.com/feed/", 70 | "https://daleonai.com/feed.xml", 71 | "https://binaryinformatics.com/category/ai/feed/", 72 | "https://www.kochartech.com/feed/", 73 | "https://medium.com/feed/@Francesco_AI", 74 | "https://medium.com/feed/archieai", 75 | "https://medium.com/feed/ai-roadmap-institute", 76 | "https://docs.microsoft.com/en-us/archive/blogs/machinelearning/feed.xml", 77 | "https://www.404media.co/rss", 78 | "https://magazine.sebastianraschka.com/feed", 79 | "https://aiacceleratorinstitute.com/rss/", 80 | "https://ai-techpark.com/category/ai/feed/", 81 | "https://knowtechie.com/category/ai/feed/", 82 | "https://aimodels.substack.com/feed", 83 | "https://www.artificialintelligence-news.com/feed/rss/", 84 | "https://venturebeat.com/category/ai/feed/", 85 | "https://ainowinstitute.org/category/news/feed", 86 | "https://siliconangle.com/category/ai/feed", 87 | "https://aisnakeoil.substack.com/feed", 88 | "https://www.anaconda.com/blog/feed", 89 | "https://analyticsindiamag.com/feed/", 90 | "https://feeds.arstechnica.com/arstechnica/index", 91 | "https://www.theguardian.com/technology/artificialintelligenceai/rss", 92 | "https://spacenews.com/tag/artificial-intelligence/feed/", 93 | "https://futurism.com/categories/ai-artificial-intelligence/feed", 94 | "https://www.wired.com/feed/tag/ai/latest/rss", 95 | "https://www.techrepublic.com/rssfeeds/topic/artificial-intelligence/", 96 | "https://medium.com/feed/artificialis", 97 | "https://siliconangle.com/category/big-data/feed", 98 | "https://davidstutz.de/category/blog/feed", 99 | "https://neptune.ai/blog/feed", 100 | "https://blog.eleuther.ai/index.xml", 101 | "https://pyimagesearch.com/blog/feed", 102 | "https://feeds.bloomberg.com/technology/news.rss", 103 | "https://www.wired.com/feed/category/business/latest/rss", 104 | "https://every.to/chain-of-thought/feed.xml", 105 | "https://huyenchip.com/feed", 106 | "https://news.crunchbase.com/feed", 107 | "https://arxiv.org/rss/cs.CL", 108 | "https://arxiv.org/rss/cs.CV", 109 | "https://arxiv.org/rss/cs.LG", 110 | "https://dagshub.com/blog/rss/", 111 | "https://www.databricks.com/feed", 112 | "https://datafloq.com/feed/?post_type=post", 113 | "https://www.datanami.com/feed/", 114 | "https://debuggercafe.com/feed/", 115 | "https://deephaven.io/blog/rss.xml", 116 | "https://tech.eu/category/deep-tech/feed", 117 | "https://departmentofproduct.substack.com/feed", 118 | "https://www.eetimes.com/feed", 119 | "https://www.engadget.com/rss.xml", 120 | "https://eugeneyan.com/rss/", 121 | "https://explosion.ai/feed", 122 | "https://www.freethink.com/feed/all", 123 | "https://www.generational.pub/feed", 124 | "https://www.forrester.com/blogs/category/artificial-intelligence-ai/feed", 125 | "https://www.ghacks.net/feed/", 126 | "https://gizmodo.com/rss", 127 | "https://globalnews.ca/tag/artificial-intelligence/feed", 128 | "https://gradientflow.com/feed/", 129 | "https://hackernoon.com/tagged/ai/feed", 130 | "https://feeds.feedburner.com/HealthTechMagazine", 131 | "https://huggingface.co/blog/feed.xml", 132 | "https://spectrum.ieee.org/feeds/topic/artificial-intelligence.rss", 133 | "https://feed.infoq.com/ai-ml-data-eng/", 134 | "https://insidebigdata.com/feed", 135 | "https://www.interconnects.ai/feed", 136 | "https://www.ibtimes.com/rss", 137 | "https://www.jmlr.org/jmlr.xml", 138 | "https://www.kdnuggets.com/feed", 139 | "https://blog.langchain.dev/rss/", 140 | "https://lastweekin.ai/feed", 141 | "https://www.latent.space/feed", 142 | "https://www.zdnet.com/topic/artificial-intelligence/rss.xml", 143 | "https://lightning.ai/pages/feed/", 144 | "https://blog.ml.cmu.edu/feed", 145 | "https://www.nature.com/subjects/machine-learning.rss", 146 | "https://www.marktechpost.com/feed", 147 | "https://www.microsoft.com/en-us/research/feed/", 148 | "https://news.mit.edu/topic/mitmachine-learning-rss.xml", 149 | "https://www.technologyreview.com/feed/", 150 | "https://www.newscientist.com/subject/technology/feed/", 151 | "https://phys.org/rss-feed/technology-news/machine-learning-ai/", 152 | "https://techxplore.com/rss-feed/machine-learning-ai-news/", 153 | "https://www.assemblyai.com/blog/rss/", 154 | "https://nicholas.carlini.com/writing/feed.xml", 155 | "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml", 156 | "https://www.oneusefulthing.org/feed", 157 | "https://blog.paperspace.com/rss/", 158 | "https://petapixel.com/feed", 159 | "https://erichartford.com/rss.xml", 160 | "https://minimaxir.com/post/index.xml", 161 | "https://api.quantamagazine.org/feed", 162 | "https://medium.com/feed/radix-ai-blog", 163 | "https://feeds.feedburner.com/RBloggers", 164 | "https://replicate.com/blog/rss", 165 | "https://notes.replicatecodex.com/rss/", 166 | "https://restofworld.org/feed/latest", 167 | "https://tech.eu/category/robotics/feed", 168 | "http://rss.sciam.com/ScientificAmerican-Global", 169 | "https://www.semianalysis.com/feed", 170 | "https://www.siliconrepublic.com/feed", 171 | "https://stackoverflow.blog/feed/", 172 | "https://arxiv.org/rss/stat.ML", 173 | "https://medium.com/feed/@netflixtechblog", 174 | "https://medium.com/feed/@odsc", 175 | "https://syncedreview.com/feed", 176 | "https://synthedia.substack.com/feed", 177 | "https://techcrunch.com/feed/", 178 | "https://www.techmeme.com/feed.xml", 179 | "https://techmonitor.ai/feed", 180 | "https://www.reutersagency.com/feed/?best-topics=tech", 181 | "https://www.techspot.com/backend.xml", 182 | "https://bdtechtalks.com/feed/", 183 | "https://thealgorithmicbridge.substack.com/feed", 184 | "https://the-decoder.com/feed/", 185 | "https://thegradient.pub/rss/", 186 | "https://www.theintrinsicperspective.com/feed/", 187 | "https://thenewstack.io/feed", 188 | "https://thenextweb.com/neural/feed", 189 | "https://rss.beehiiv.com/feeds/2R3C6Bt5wj.xml", 190 | "https://thesequence.substack.com/feed", 191 | "https://www.thestack.technology/latest/rss/", 192 | "https://blog.tensorflow.org/feeds/posts/default?alt=rss", 193 | "https://www.thetradenews.com/feed/", 194 | "http://feeds.libsyn.com/102459/rss", 195 | "https://pub.towardsai.net/feed", 196 | "https://towardsdatascience.com/feed", 197 | "https://unwindai.substack.com/feed", 198 | "https://visualstudiomagazine.com/rss-feeds/news.aspx", 199 | "https://voicebot.ai/feed/", 200 | "https://wandb.ai/fully-connected/rss.xml", 201 | "https://blogs.windows.com/feed", 202 | "https://blog.wolfram.com/feed/", 203 | "https://aihub.org/feed?cat=-473", 204 | # "https://topenddevs.com/podcasts/adventures-in-machine-learning/rss.rss", 205 | "https://aiandbanking.libsyn.com/rss", 206 | "https://feeds.blubrry.com/feeds/aitoday.xml", 207 | "https://feeds.acast.com/public/shows/e421d786-ec36-4148-aa99-7a3b2928a779", 208 | "https://datascienceathome.com/feed.xml", 209 | "https://dataskeptic.libsyn.com/rss", 210 | "https://datastori.es/feed/", 211 | # "https://anchor.fm/s/41286f68/podcast/rss", 212 | "https://aneyeonai.libsyn.com/rss", 213 | # "https://geomob-podcast.castos.com/feed", 214 | # "https://anchor.fm/s/443868ac/podcast/rss", 215 | "https://feeds.captivate.fm/gradient-dissent/", 216 | "https://feed.podbean.com/hdsr/feed.xml", 217 | # "https://anchor.fm/s/174cb1b8/podcast/rss", 218 | "http://feeds.soundcloud.com/users/soundcloud:users:306749289/sounds.rss", 219 | "http://nssdeviations.com/rss", 220 | "https://feeds.transistor.fm/postgres-fm", 221 | "https://changelog.com/practicalai/feed", 222 | "http://lexisnexisbis.libsyn.com/rss", 223 | "https://talkpython.fm/episodes/rss", 224 | "https://feeds.libsyn.com/468519/rss", 225 | # "http://podcast.emerj.com/rss", 226 | "http://feeds.soundcloud.com/users/soundcloud:users:264034133/sounds.rss", 227 | # "https://anchor.fm/s/3952c6f8/podcast/rss", 228 | "https://feeds.transistor.fm/the-data-engineering-show", 229 | "https://thedataexchange.media/feed/", 230 | # "https://api.substack.com/feed/podcast/265424/s/1354.rss", 231 | "https://feeds.megaphone.fm/marketingai", 232 | "https://twimlai.com/feed", 233 | "https://feeds.transistor.fm/this-day-in-ai", 234 | # "https://anchor.fm/s/32ec7408/podcast/rss", 235 | ] 236 | 237 | sites = { 238 | "gh_url": "https://api.github.com", 239 | "ph_url": "https://api.producthunt.com/v2/api/graphql", 240 | "ph_site_url": "https://www.producthunt.com", 241 | "hf_base_url": "https://huggingface.co", 242 | "hf_papers_url": "https://huggingface.co/papers?date=2024-12-20", 243 | "hf_board_url": "https://huggingface.co/collections/open-llm-leaderboard", 244 | "gh_daily_url": "https://github.com/trending/python?since=daily&spoken_language_code=en", 245 | "gh_weekly_url": "https://github.com/trending/python?since=weekly&spoken_language_code=en", 246 | "arxiv_url": "http://export.arxiv.org/api/query?", 247 | "events_url": ["https://conferencealerts.co.in/artificial-intelligence", 248 | "https://aideadlin.es/?sub=ML,CV,CG,NLP,RO,SP,DM,AP,KR,HCI"], 249 | "events_feed":"https://aiml.events/feed/rss/" 250 | } -------------------------------------------------------------------------------- /router/routes.py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | from app.main import * 4 | from db_handler import TaskType, SchedulerState 5 | from utils.auth_utility import create_token, token_required 6 | from utils.utility import is_valid_email, is_email_subscribed, save_to_csv 7 | 8 | from flask_cors import CORS 9 | from flask_limiter import Limiter 10 | from flask import Blueprint, jsonify, request 11 | from flask_limiter.util import get_remote_address 12 | 13 | 14 | bp = Blueprint("ailert", __name__, url_prefix="/internal/v1") 15 | 16 | limiter = Limiter( 17 | key_func=get_remote_address, 18 | default_limits=["10 per day", "2 per hour"] 19 | ) 20 | 21 | CORS(bp, resources={ 22 | r"/api/*": { 23 | "origins": ["https://ailert.tech"], 24 | "methods": ["GET", "POST", "PUT", "DELETE"], 25 | "allow_headers": ["Content-Type", "Authorization"] 26 | } 27 | }) 28 | 29 | config = configparser.ConfigParser() 30 | config.read('db_handler/vault/secrets.ini') 31 | user_id = config["JWT"]["user_id"] 32 | 33 | 34 | @bp.route('/login', methods=['POST']) 35 | def login(): 36 | token = create_token(user_id) 37 | return jsonify({ 38 | "status": "success", 39 | "token": token 40 | }) 41 | 42 | 43 | @bp.route('/start-scheduler/', methods=['POST']) 44 | @limiter.limit("5 per hour") 45 | @token_required 46 | def start_scheduler(task_type): 47 | if task_type not in [t.value for t in TaskType]: 48 | return jsonify({ 49 | "status": "error", 50 | "message": "Invalid task type. Use 'daily' or 'weekly'" 51 | }), 400 52 | 53 | if scheduler_state["is_running"]: 54 | return jsonify({ 55 | "status": "error", 56 | "message": "Scheduler is already running" 57 | }), 400 58 | 59 | stop_event.clear() 60 | scheduler_state["is_running"] = True 61 | scheduler_state["is_paused"] = False 62 | scheduler_state["task_type"] = task_type 63 | 64 | scheduler_thread.start() 65 | 66 | return jsonify({ 67 | "status": "success", 68 | "message": f"{task_type} scheduler started successfully", 69 | "state": SchedulerState.RUNNING.value 70 | }) 71 | 72 | 73 | @bp.route('/manage-scheduler/', methods=['POST']) 74 | @limiter.limit("5 per hour") 75 | @token_required 76 | def manage_scheduler(action): 77 | if not scheduler_state["is_running"]: 78 | return jsonify({ 79 | "status": "error", 80 | "message": "No scheduler is currently running" 81 | }), 400 82 | 83 | if action == "pause": 84 | if scheduler_state["is_paused"]: 85 | return jsonify({ 86 | "status": "error", 87 | "message": "Scheduler is already paused" 88 | }), 400 89 | scheduler_state["is_paused"] = True 90 | state = SchedulerState.PAUSED.value 91 | message = "Scheduler paused successfully" 92 | 93 | elif action == "resume": 94 | if not scheduler_state["is_paused"]: 95 | return jsonify({ 96 | "status": "error", 97 | "message": "Scheduler is not paused" 98 | }), 400 99 | scheduler_state["is_paused"] = False 100 | state = SchedulerState.RUNNING.value 101 | message = "Scheduler resumed successfully" 102 | 103 | elif action == "stop": 104 | stop_event.set() 105 | if scheduler_thread: 106 | scheduler_thread.join() 107 | schedule.clear() 108 | scheduler_state["task_type"] = None 109 | state = SchedulerState.STOPPED.value 110 | message = "Scheduler stopped successfully" 111 | 112 | else: 113 | return jsonify({ 114 | "status": "error", 115 | "message": "Invalid action. Use 'pause', 'resume', or 'stop'" 116 | }), 400 117 | 118 | return jsonify({ 119 | "status": "success", 120 | "message": message, 121 | "state": state, 122 | "task_type": scheduler_state["task_type"] 123 | }) 124 | 125 | 126 | @bp.route('/scheduler-status', methods=['GET']) 127 | @limiter.limit("5 per hour") 128 | @token_required 129 | def get_scheduler_status(): 130 | if not scheduler_state["is_running"]: 131 | state = SchedulerState.STOPPED.value 132 | elif scheduler_state["is_paused"]: 133 | state = SchedulerState.PAUSED.value 134 | else: 135 | state = SchedulerState.RUNNING.value 136 | 137 | return jsonify({ 138 | "is_running": scheduler_state["is_running"], 139 | "state": state, 140 | "task_type": scheduler_state["task_type"] 141 | }) 142 | 143 | 144 | @bp.route('/generate-newsletter', methods=['POST']) 145 | @limiter.limit("5 per hour") 146 | @token_required 147 | async def api_generate_newsletter(): 148 | try: 149 | data = request.get_json() 150 | 151 | if not data: 152 | return jsonify({ 153 | "status": "error", 154 | "message": "No data provided", 155 | "timestamp": utility.get_formatted_timestamp() 156 | }), 400 157 | 158 | sections = data.get('sections') 159 | task_type = data.get('task_type') 160 | 161 | if not sections or not task_type: 162 | return jsonify({ 163 | "status": "error", 164 | "message": "Missing required fields: sections or task_type", 165 | "timestamp": utility.get_formatted_timestamp() 166 | }), 400 167 | 168 | if task_type not in [TaskType.WEEKLY.value, TaskType.DAILY.value]: 169 | return jsonify({ 170 | "status": "error", 171 | "message": f"Invalid task_type. Must be either 'weekly' or 'daily'", 172 | "timestamp": utility.get_formatted_timestamp() 173 | }), 400 174 | 175 | newsletter_html = await generate_newsletter(sections, task_type) 176 | 177 | return jsonify({ 178 | "status": "success", 179 | "message": "Newsletter generated successfully", 180 | "content": newsletter_html, 181 | "timestamp": utility.get_formatted_timestamp() 182 | }) 183 | 184 | except Exception as e: 185 | logging.error(f"Error generating newsletter: {str(e)}") 186 | return jsonify({ 187 | "status": "error", 188 | "message": f"Error generating newsletter: {str(e)}", 189 | "timestamp": utility.get_formatted_timestamp() 190 | }), 500 191 | 192 | 193 | @bp.route('/save-newsletter', methods=['POST']) 194 | @limiter.limit("5 per hour") 195 | @token_required 196 | def api_save_newsletter(): 197 | try: 198 | data = request.get_json() 199 | 200 | if not data: 201 | return jsonify({ 202 | "status": "error", 203 | "message": "No data provided", 204 | "timestamp": utility.get_formatted_timestamp() 205 | }), 400 206 | 207 | content = data.get('content') 208 | content_type = data.get('content_type') 209 | 210 | if not content or not content_type: 211 | return jsonify({ 212 | "status": "error", 213 | "message": "Missing required fields: content or content_type", 214 | "timestamp": utility.get_formatted_timestamp() 215 | }), 400 216 | 217 | if content_type not in ['weekly', 'daily']: 218 | return jsonify({ 219 | "status": "error", 220 | "message": "Invalid content_type. Must be either 'weekly' or 'daily'", 221 | "timestamp": utility.get_formatted_timestamp() 222 | }), 400 223 | 224 | saved_item = save_to_db(content, content_type) 225 | 226 | return jsonify({ 227 | "status": "success", 228 | "message": "Newsletter saved successfully", 229 | "newsletterId": saved_item["newsletterId"], 230 | "timestamp": utility.get_formatted_timestamp() 231 | }) 232 | 233 | except Exception as e: 234 | logging.error(f"Error saving newsletter: {str(e)}") 235 | return jsonify({ 236 | "status": "error", 237 | "message": f"Error saving newsletter: {str(e)}", 238 | "timestamp": utility.get_formatted_timestamp() 239 | }), 500 240 | 241 | 242 | @bp.route('/send-email', methods=['POST']) 243 | @limiter.limit("5 per hour") 244 | @token_required 245 | async def api_send_email(): 246 | try: 247 | data = request.get_json() 248 | 249 | if not data: 250 | return jsonify({ 251 | "status": "error", 252 | "message": "No data provided", 253 | "timestamp": utility.get_formatted_timestamp() 254 | }), 400 255 | 256 | recipients = data.get('recipients', []) 257 | content = data.get('content') 258 | template_id = data.get('template_id') 259 | 260 | if not content: 261 | return jsonify({ 262 | "status": "error", 263 | "message": "Missing required field: content", 264 | "timestamp": utility.get_formatted_timestamp() 265 | }), 400 266 | 267 | result = await send_email(recipients, content, template_id) 268 | 269 | return jsonify({ 270 | **result, # Include all fields from the EmailService response 271 | "timestamp": utility.get_formatted_timestamp() 272 | }) 273 | 274 | except Exception as e: 275 | logging.error(f"Error sending email: {str(e)}") 276 | return jsonify({ 277 | "status": "error", 278 | "message": f"Error sending email: {str(e)}", 279 | "timestamp": utility.get_formatted_timestamp() 280 | }), 500 281 | 282 | 283 | @bp.route('/generate-and-send', methods=['POST']) 284 | @limiter.limit("5 per hour") 285 | @token_required 286 | async def api_generate_and_send(): 287 | try: 288 | data = request.get_json() 289 | 290 | if not data: 291 | return jsonify({ 292 | "status": "error", 293 | "message": "No data provided", 294 | "timestamp": utility.get_formatted_timestamp() 295 | }), 400 296 | 297 | sections = data.get('sections') 298 | task_type = data.get('task_type') 299 | recipients = data.get('recipients', []) 300 | 301 | if not sections or not task_type: 302 | return jsonify({ 303 | "status": "error", 304 | "message": "Missing required fields: sections or task_type", 305 | "timestamp": utility.get_formatted_timestamp() 306 | }), 400 307 | 308 | # Generate newsletter 309 | newsletter_html = await generate_newsletter(sections, task_type) 310 | 311 | # Save to database 312 | saved_item = save_to_db(newsletter_html, task_type) 313 | 314 | # Send email 315 | email_result = await send_email(recipients, saved_item["content"], saved_item["newsletterId"]) 316 | 317 | return jsonify({ 318 | "status": "success", 319 | "message": "Newsletter generated and sent successfully", 320 | "newsletterId": saved_item["newsletterId"], 321 | "email_status": email_result, 322 | "timestamp": utility.get_formatted_timestamp() 323 | }) 324 | 325 | except Exception as e: 326 | logging.error(f"Error in generate and send workflow: {str(e)}") 327 | return jsonify({ 328 | "status": "error", 329 | "message": f"Error in generate and send workflow: {str(e)}", 330 | "timestamp": utility.get_formatted_timestamp() 331 | }), 500 332 | 333 | 334 | @bp.route('/subscribe', methods=['POST']) 335 | def subscribe(): 336 | try: 337 | data = request.get_json() 338 | 339 | if not data or 'email' not in data: 340 | return jsonify({ 341 | "status": "error", 342 | "message": "Email is required", 343 | "timestamp": utility.get_formatted_timestamp() 344 | }), 400 345 | 346 | email = data['email'].lower().strip() 347 | 348 | if not is_valid_email(email): 349 | return jsonify({ 350 | "status": "error", 351 | "message": "Invalid email format", 352 | "timestamp": utility.get_formatted_timestamp() 353 | }), 400 354 | 355 | if is_email_subscribed(email): 356 | return jsonify({ 357 | "status": "error", 358 | "message": "Email already subscribed", 359 | "timestamp": utility.get_formatted_timestamp() 360 | }), 400 361 | 362 | if save_to_csv(email): 363 | return jsonify({ 364 | "status": "success", 365 | "message": "Successfully subscribed", 366 | "timestamp": utility.get_formatted_timestamp() 367 | }), 201 368 | else: 369 | return jsonify({ 370 | "status": "error", 371 | "message": "Failed to save subscription", 372 | "timestamp": utility.get_formatted_timestamp() 373 | }), 500 374 | 375 | except Exception as e: 376 | logging.error(f"Error in subscribe endpoint: {str(e)}") 377 | return jsonify({ 378 | "status": "error", 379 | "message": "Internal server error", 380 | "timestamp": utility.get_formatted_timestamp() 381 | }), 500 382 | 383 | 384 | @bp.route('/unsubscribe', methods=['POST']) 385 | def unsubscribe(): 386 | try: 387 | data = request.get_json() 388 | 389 | if not data or 'email' not in data: 390 | return jsonify({ 391 | "status": "error", 392 | "message": "Email is required", 393 | "timestamp": utility.get_formatted_timestamp() 394 | }), 400 395 | 396 | email = data['email'].lower().strip() 397 | csv_file = 'db_handler/vault/subscribers.csv' 398 | 399 | if not os.path.exists(csv_file): 400 | return jsonify({ 401 | "status": "error", 402 | "message": "Email not found", 403 | "timestamp": utility.get_formatted_timestamp() 404 | }), 404 405 | 406 | temp_rows = [] 407 | found = False 408 | 409 | with open(csv_file, 'r') as file: 410 | reader = csv.reader(file) 411 | temp_rows.append(next(reader)) # Keep header 412 | for row in reader: 413 | if row[0] != email: 414 | temp_rows.append(row) 415 | else: 416 | found = True 417 | 418 | if not found: 419 | return jsonify({ 420 | "status": "error", 421 | "message": "Email not found", 422 | "timestamp": utility.get_formatted_timestamp() 423 | }), 404 424 | 425 | with open(csv_file, 'w', newline='') as file: 426 | writer = csv.writer(file) 427 | writer.writerows(temp_rows) 428 | 429 | return jsonify({ 430 | "status": "success", 431 | "message": "Successfully unsubscribed", 432 | "timestamp": utility.get_formatted_timestamp() 433 | }) 434 | 435 | except Exception as e: 436 | logging.error(f"Error in unsubscribe endpoint: {str(e)}") 437 | return jsonify({ 438 | "status": "error", 439 | "message": "Internal server error", 440 | "timestamp": utility.get_formatted_timestamp() 441 | }), 500 442 | 443 | --------------------------------------------------------------------------------