├── db_handler
├── sample_vault
│ ├── recipients.csv
│ ├── secrets.ini
│ └── links.py
├── __init__.py
├── models.py
├── db.py
└── dynamo.py
├── .gitignore
├── services
├── crawler
│ ├── blog_crawler.py
│ ├── social_media_crawler.py
│ ├── __init__.py
│ └── rss_crawler.py
├── apps
│ ├── or_service.py
│ ├── __init__.py
│ ├── kg_service.py
│ ├── hf_service.py
│ ├── gh_service.py
│ ├── ph_service.py
│ └── arx_service.py
├── __init__.py
├── competition_service.py
├── product_service.py
├── research_service.py
├── email_service.py
├── news_service.py
└── event_service.py
├── .dockerignore
├── launch.py
├── requirements.txt
├── static
├── logo.svg
├── favicon.svg
├── newsletter.html
└── style.css
├── LICENSE
├── Dockerfile
├── utils
├── auth_utility.py
└── utility.py
├── .github
└── workflows
│ └── deploy.yml
├── app
└── main.py
├── README.md
├── CODE_OF_CONDUCT.md
└── router
└── routes.py
/db_handler/sample_vault/recipients.csv:
--------------------------------------------------------------------------------
1 | email
2 | add-your-email@test.com
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /test.py
2 | /static/generated_newsletter.html
3 | /db_handler/vault/
4 | /sync-vault*.sh
5 |
--------------------------------------------------------------------------------
/services/crawler/blog_crawler.py:
--------------------------------------------------------------------------------
1 | class SubstackCrawler:
2 | pass
3 |
4 | class MediumCrawler:
5 | pass
--------------------------------------------------------------------------------
/services/crawler/social_media_crawler.py:
--------------------------------------------------------------------------------
1 | class TwitterCrawler:
2 | pass
3 |
4 | class LinkedinCrawler:
5 | pass
--------------------------------------------------------------------------------
/db_handler/__init__.py:
--------------------------------------------------------------------------------
1 | from db_handler.db import *
2 | from db_handler.models import *
3 | from db_handler.dynamo import Dynamo
4 | from db_handler.vault.links import rss_feed, sites
--------------------------------------------------------------------------------
/services/apps/or_service.py:
--------------------------------------------------------------------------------
1 | class OpenReviewScanner:
2 | def __init__(self, top_n: int = 5):
3 | self.top_n = top_n
4 |
5 | def get_top_n_papers(self):
6 | pass
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.pyc
3 | *.pyo
4 | *.pyd
5 | .Python
6 | env/
7 | venv/
8 | .env
9 | *.log
10 | .git
11 | .gitignore
12 | .pytest_cache/
13 | .coverage
14 | htmlcov/
15 | .DS_Store
16 | test.py
17 | *.sqlite
--------------------------------------------------------------------------------
/launch.py:
--------------------------------------------------------------------------------
1 | from flask import Flask
2 | from router.routes import bp, limiter
3 | import os
4 |
5 | app = Flask(__name__)
6 |
7 | limiter.init_app(app)
8 | app.register_blueprint(bp)
9 |
10 |
11 | if __name__ == "__main__":
12 | port = int(os.environ.get("PORT", 5000))
13 | app.run(host="0.0.0.0", port=port, debug=True)
14 |
--------------------------------------------------------------------------------
/services/crawler/__init__.py:
--------------------------------------------------------------------------------
1 | from services.crawler.rss_crawler import *
2 | from services.crawler.blog_crawler import SubstackCrawler, MediumCrawler
3 | from services.crawler.social_media_crawler import LinkedinCrawler, TwitterCrawler
4 |
5 | __all__ = [
6 | "SubstackCrawler",
7 | "MediumCrawler",
8 | "LinkedinCrawler",
9 | "TwitterCrawler"
10 | ]
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | substack-api==1.1.1
2 | kaggle==1.7.4.5
3 | pydantic==2.11.9
4 | pandas
5 | requests==2.32.5
6 | simplejson==3.20.2
7 | botocore==1.40.44
8 | schedule==1.2.2
9 | PyJWT
10 | cryptography==46.0.2
11 | beautifulsoup4==4.14.2
12 | numpy
13 | scikit-learn
14 | feedparser==6.0.12
15 | pytz==2025.2
16 | Flask
17 | Flask-Cors==6.0.1
18 | Flask-Limiter
19 | uvicorn
20 | sqlitedict==2.1.0
21 | sendgrid==7.0.0rc2
22 | boto3==1.40.44
23 |
--------------------------------------------------------------------------------
/services/apps/__init__.py:
--------------------------------------------------------------------------------
1 | from services.apps.arx_service import ArxivScanner
2 | from services.apps.gh_service import GitHubScanner
3 | from services.apps.hf_service import HuggingFaceScanner
4 | from services.apps.kg_service import KaggleScanner
5 | from services.apps.or_service import OpenReviewScanner
6 | from services.apps.ph_service import ProductHuntScanner
7 |
8 | __all__ = [
9 | "ArxivScanner",
10 | "GitHubScanner",
11 | "HuggingFaceScanner",
12 | "KaggleScanner",
13 | "OpenReviewScanner",
14 | "ProductHuntScanner"
15 | ]
--------------------------------------------------------------------------------
/db_handler/sample_vault/secrets.ini:
--------------------------------------------------------------------------------
1 | [default]
2 | brand_name = "AiLert"
3 |
4 | [HuggingFace]
5 | # token = add github token and uncomment this line
6 |
7 | [Kaggle]
8 | # path = add kaggle credential file path here and uncomment
9 |
10 |
11 | [Dynamo]
12 | # region = us-east-1
13 |
14 | [Arxiv]
15 | # q = cat:cs.CV+OR+cat:cs.LG+OR+cat:cs.CL+OR+cat:cs.AI+OR+cat:cs.NE+OR+cat:cs.RO
16 |
17 | [Sendgrid]
18 | # api_key = add sendgrid api key and uncomment
19 |
20 | [JWT]
21 | # user_id = test
22 | # token = generate a random token that your apis will accept
--------------------------------------------------------------------------------
/static/logo.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/services/__init__.py:
--------------------------------------------------------------------------------
1 | from services.news_service import NewsService
2 | from services.event_service import EventsService
3 | from services.research_service import ResearchService
4 | from services.apps.gh_service import GitHubScanner
5 | from services.competition_service import CompetitionService
6 | from services.product_service import ProductService
7 | from services.email_service import EmailService
8 |
9 |
10 | __all__ = [
11 | "NewsService",
12 | "GitHubScanner",
13 | "CompetitionService",
14 | "EventsService",
15 | "ResearchService",
16 | "ProductService",
17 | "EmailService"
18 | ]
--------------------------------------------------------------------------------
/services/competition_service.py:
--------------------------------------------------------------------------------
1 | from db_handler import Competitions
2 | from services.apps import KaggleScanner
3 |
4 | class CompetitionService:
5 | def __init__(self):
6 | self.kaggle = KaggleScanner()
7 | self.competitions = []
8 |
9 | async def get_latest_competitions(self):
10 | kaggle = self.kaggle.get_new_competitions_launch()
11 | self.competitions.extend([Competitions(
12 | name = comp["name"],
13 | link = comp["link"],
14 | deadline = comp["deadline"],
15 | reward = comp["reward"]
16 | ) for comp in kaggle])
17 |
18 | return self.competitions
--------------------------------------------------------------------------------
/services/product_service.py:
--------------------------------------------------------------------------------
1 | from db_handler import Products, sites
2 | from services.apps import HuggingFaceScanner, ProductHuntScanner
3 |
4 | class ProductService:
5 | def __init__(self):
6 | self.hf_scanner = HuggingFaceScanner(sites["hf_base_url"],1)
7 | self.ph_scanner = ProductHuntScanner(sites["ph_site_url"], sites["ph_url"],1)
8 | self.products = []
9 |
10 | async def get_latest_products(self):
11 | hf_products = self.hf_scanner.weekly_scanner()
12 | ph_products = None #self.ph_scanner.get_last_week_top_products()
13 | final_dict = hf_products #+ ph_products
14 | for key, items in final_dict.items():
15 | for item in items:
16 | self.products.append(Products(
17 | name = item["title"],
18 | link = item["link"],
19 | summary = item["summary"],
20 | source = item["source"],
21 | engagement = item["engagement"]
22 | ))
23 | return self.products
24 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 Anuj Gupta
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Use Python 3.11 slim image as base
2 | FROM python:3.13-slim
3 |
4 | # Set working directory
5 | WORKDIR /app
6 |
7 | # Set environment variables
8 | ENV PYTHONDONTWRITEBYTECODE=1 \
9 | PYTHONUNBUFFERED=1 \
10 | FLASK_APP=launch.py \
11 | FLASK_ENV=production
12 |
13 | # Install system dependencies
14 | RUN apt-get update && apt-get install -y --no-install-recommends \
15 | build-essential \
16 | libpq-dev \
17 | && rm -rf /var/lib/apt/lists/*
18 |
19 | # Create non-root user for security
20 | RUN adduser --disabled-password --gecos '' appuser
21 |
22 | # Copy requirements first to leverage Docker cache
23 | COPY requirements.txt .
24 |
25 | # Install Python dependencies
26 | RUN pip install --no-cache-dir -r requirements.txt
27 |
28 | # Copy the rest of the application
29 | COPY . .
30 |
31 | # Create vault directory
32 | RUN mkdir -p /app/db_handler/vault && \
33 | chown -R appuser:appuser /app
34 |
35 | # Switch to non-root user
36 | USER appuser
37 |
38 | # Create volume for vault
39 | VOLUME ["/app/db_handler/vault"]
40 |
41 | # Expose port
42 | EXPOSE 5000
43 |
44 | # Command to run the application
45 | CMD ["python", "launch.py"]
--------------------------------------------------------------------------------
/static/favicon.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/utils/auth_utility.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import jwt
4 | import configparser
5 | from functools import wraps
6 | from flask import request, jsonify
7 | from datetime import datetime, timedelta
8 |
9 | config = configparser.ConfigParser()
10 | config.read('db_handler/vault/secrets.ini')
11 | JWT_SECRET_KEY = config["JWT"]["token"]
12 |
13 |
14 | def create_token(user_id):
15 | payload = {
16 | 'exp': datetime.now() + timedelta(days=1),
17 | 'sub': user_id
18 | }
19 |
20 | token = jwt.encode(
21 | payload,
22 | JWT_SECRET_KEY,
23 | algorithm='HS256'
24 | )
25 | return token
26 |
27 |
28 | def token_required(f):
29 | @wraps(f)
30 | def decorated(*args, **kwargs):
31 | token = None
32 |
33 | # Check for token in headers
34 | if 'Authorization' in request.headers:
35 | token = request.headers['Authorization'].split(" ")[1]
36 |
37 | if not token:
38 | return jsonify({
39 | 'message': 'Token is missing',
40 | 'status': 'error'
41 | }), 401
42 |
43 | try:
44 | # Decode token
45 | data = jwt.decode(token, JWT_SECRET_KEY, algorithms=["HS256"])
46 | current_user = data['sub']
47 | except Exception as e:
48 | logging.info("Token error" + str(e))
49 | return jsonify({
50 | 'message': 'Token is invalid',
51 | 'status': 'error'
52 | }), 401
53 |
54 | return f(*args, **kwargs)
55 |
56 | return decorated
57 |
--------------------------------------------------------------------------------
/db_handler/models.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 | from pydantic import BaseModel
3 | from typing import List, Optional
4 |
5 | class TaskType(Enum):
6 | DAILY = "daily"
7 | WEEKLY = "weekly"
8 |
9 | class SchedulerState(Enum):
10 | RUNNING = "running"
11 | PAUSED = "paused"
12 | STOPPED = "stopped"
13 |
14 | class NewsItem(BaseModel):
15 | title: str
16 | description: str
17 | link: str
18 | read_time: int
19 | source: Optional[str] = None
20 | engagement: Optional[str] = None
21 | additional_info: Optional[dict] = None
22 |
23 | class Competitions(BaseModel):
24 | name: str
25 | link: str
26 | deadline: str
27 | reward: str
28 |
29 | class Repo(BaseModel):
30 | name: str
31 | link: str
32 | summary: str
33 | source: Optional[str] = None
34 | engagement: Optional[str] = None
35 |
36 | class Products(BaseModel):
37 | name: str
38 | link: str
39 | summary: str
40 | source: Optional[str] = None
41 | engagement: Optional[str] = None
42 |
43 | class Event(BaseModel):
44 | title: str
45 | date: str
46 | location: str
47 | description: str
48 |
49 | class ResearchPaper(BaseModel):
50 | title: str
51 | authors: List[str]
52 | abstract: str
53 | publication: str
54 | link: str
55 | date: str
56 | engagement: Optional[str] = None
57 |
58 | class NewsletterContent(BaseModel):
59 | # model_config = dict(arbitrary_types_allowed=True)
60 | highlights: List[dict] | None = None
61 | breaking_news: List[NewsItem] | None = None
62 | research_papers: List[ResearchPaper] | None = None
63 | latest_competitions: List[Competitions] | None = None
64 | top_products: List[Products] | None = None
65 | github_trending: List[Repo] | None = None
66 | upcoming_events: List[Event] | None = None
--------------------------------------------------------------------------------
/services/apps/kg_service.py:
--------------------------------------------------------------------------------
1 | import configparser
2 | import os
3 | import subprocess
4 |
5 | config = configparser.ConfigParser()
6 | config.read('db_handler/vault/secrets.ini')
7 |
8 | default_cred = config["Kaggle"]["path"]
9 |
10 | class KaggleScanner:
11 | def __init__(self, base_url: str = "", top_n=5, kaggle_cred_path=default_cred):
12 | self.base_url = base_url
13 | self.top_n = top_n
14 | self.kaggle_cred_path = kaggle_cred_path
15 | self.response = []
16 |
17 | def _get_top_n_kaggle_competitions(self):
18 | try:
19 | os.environ["KAGGLE_CONFIG_DIR"] = os.path.expanduser(self.kaggle_cred_path)
20 | result = subprocess.run(
21 | ["kaggle", "competitions", "list", "--sort-by", "prize"],
22 | stdout=subprocess.PIPE,
23 | text=True
24 | )
25 | if result.returncode != 0:
26 | print("Error fetching Kaggle competitions:", result.stderr)
27 | return
28 |
29 | lines = result.stdout.strip().split("\n")
30 | data_rows = [line for line in lines if "https://www.kaggle.com" in line]
31 | response = []
32 |
33 | for row in data_rows[:self.top_n]:
34 | columns = row.split()
35 | if len(columns) > 0:
36 | competition_link = columns[0]
37 | deadline = columns[1]
38 | reward = columns[4]
39 |
40 | competition_name = competition_link.split("/")[-1]
41 |
42 | response.append({
43 | "name": competition_name,
44 | "link": competition_link,
45 | "deadline": deadline,
46 | "reward": reward
47 | })
48 | return response
49 | except Exception as e:
50 | print(f"Error: {e}")
51 |
52 | def get_new_competitions_launch(self):
53 | self.response = self._get_top_n_kaggle_competitions()
54 | return self.response
55 |
--------------------------------------------------------------------------------
/services/apps/hf_service.py:
--------------------------------------------------------------------------------
1 | import configparser
2 |
3 | import requests
4 |
5 | config = configparser.ConfigParser()
6 | config.read('db_handler/vault/secrets.ini')
7 |
8 | default_token = config["HuggingFace"]["token"]
9 |
10 | class HuggingFaceScanner:
11 | def __init__(self, base_url, top_n=5, auth_token=default_token):
12 | self.base_url = base_url
13 | self.top_n = top_n
14 | self.auth_token = "Bearer "+auth_token
15 | self.response = {}
16 |
17 | def _top_models(self, top_n):
18 | url = self.base_url+"/api/models"
19 | response = requests.get(
20 | url, params={"limit": top_n, "full": "True", "config": "False"},
21 | headers={"Authorization":self.auth_token}
22 | )
23 | return [{"title":model["modelId"],
24 | "link":self.base_url+model["id"],
25 | "summary": model["author"],
26 | "source":"HuggingFace",
27 | "engagement": str(model["trendingScore"])}for model in response.json()]
28 |
29 | def _top_datasets(self, top_n):
30 | url = self.base_url+"/api/datasets"
31 | response = requests.get(
32 | url, params={"limit": top_n, "full": "False"},
33 | headers={"Authorization":self.auth_token}
34 | )
35 | return [{"title": dataset["id"],
36 | "link": self.base_url + dataset["id"],
37 | "summary": dataset["author"],
38 | "source": "HuggingFace",
39 | "engagement": str(dataset["trendingScore"])} for dataset in response.json()]
40 |
41 | def _top_apps(self, top_n):
42 | url = self.base_url+"/api/spaces"
43 | response = requests.get(
44 | url, params={"limit": top_n, "full": "True"},
45 | headers={"Authorization":self.auth_token}
46 | )
47 | return [{"title": apps["id"],
48 | "link": self.base_url + apps["id"],
49 | "summary": apps["author"],
50 | "source": "HuggingFace",
51 | "engagement": str(apps["trendingScore"])} for apps in response.json()]
52 |
53 | def weekly_scanner(self):
54 | self.response["top_models"] = self._top_models(self.top_n)
55 | self.response["top_datasets"] = self._top_datasets(self.top_n)
56 | self.response["top_apps"] = self._top_apps(self.top_n)
57 | return self.response
--------------------------------------------------------------------------------
/services/research_service.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import configparser
3 | from sklearn import svm
4 | from db_handler import sites
5 | from typing import List, Dict
6 |
7 | from db_handler import ResearchPaper
8 | from services.apps import ArxivScanner
9 | from services.apps import OpenReviewScanner
10 | from sklearn.feature_extraction.text import TfidfVectorizer
11 |
12 |
13 | config = configparser.ConfigParser()
14 | config.read('db_handler/vault/secrets.ini')
15 |
16 | class ResearchService:
17 | def __init__(self, top_n:int = 3):
18 | self.top_n = top_n
19 | self. arxiv = ArxivScanner(sites["arxiv_url"], top_n=top_n)
20 | self.open_review = OpenReviewScanner(top_n=top_n)
21 | self.top_papers = []
22 |
23 | def _rerank(self, arxiv_papers: List[Dict], open_papers: List[Dict]) -> List[Dict]:
24 | all_papers = arxiv_papers + open_papers
25 | texts = [f"{p['title']} {p['abstract']} {' '.join(p['authors'])}" for p in all_papers]
26 |
27 | vectorizer = TfidfVectorizer(
28 | max_features=5000,
29 | stop_words='english',
30 | ngram_range=(1, 2)
31 | )
32 | x = vectorizer.fit_transform(texts)
33 | y = np.zeros(len(all_papers))
34 | for i, paper in enumerate(all_papers):
35 | score = float(paper.get('score', 0))
36 | citations = float(paper.get('citations', 0))
37 | y[i] = score + 0.1 * citations
38 |
39 | if y.max() > y.min():
40 | y = (y - y.min()) / (y.max() - y.min())
41 |
42 | clf = svm.LinearSVC(
43 | class_weight='balanced',
44 | max_iter=1000,
45 | dual=False
46 | )
47 | clf.fit(x, y > np.median(y))
48 | scores = clf.decision_function(x)
49 | scored_papers = [(paper, score) for paper, score in zip(all_papers, scores)]
50 | reranked = sorted(scored_papers, key=lambda x: x[1], reverse=True)
51 | return [paper for paper, _ in reranked[:self.top_n]]
52 |
53 | async def get_latest_papers(self):
54 | search_query = config["Arxiv"]["q"]
55 | arxiv_papers = self.arxiv.get_top_n_papers(search_query=search_query)
56 | open_r_papers = self.open_review.get_top_n_papers()
57 | reranked_papers = self._rerank(arxiv_papers, open_r_papers)
58 | self.top_papers.extend(ResearchPaper(
59 | title = paper["title"],
60 | abstract= paper["abstract"],
61 | authors = paper["authors"],
62 | publication = paper["publication"],
63 | date = paper["_time_str"],
64 | link = paper["url"],
65 | engagement = "") for paper in reranked_papers)
66 | return self.top_papers
67 |
--------------------------------------------------------------------------------
/.github/workflows/deploy.yml:
--------------------------------------------------------------------------------
1 | name: Deploy to EC2
2 |
3 | on:
4 | pull_request:
5 | types: [closed]
6 | branches: [ main ]
7 | workflow_dispatch:
8 |
9 | env:
10 | DOCKER_IMAGE_TAG: ${{ github.sha }}
11 |
12 | jobs:
13 | deploy:
14 | runs-on: ubuntu-latest
15 | if: github.event.pull_request.merged == true
16 |
17 | steps:
18 | - name: Checkout Repository
19 | uses: actions/checkout@v4
20 |
21 | - name: Configure AWS Credentials
22 | uses: aws-actions/configure-aws-credentials@v4
23 | with:
24 | aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
25 | aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
26 | aws-region: ${{ secrets.AWS_REGION }}
27 |
28 | - name: Setup SSH and Deploy
29 | run: |
30 | # Setup SSH
31 | mkdir -p ~/.ssh
32 | echo "${{ secrets.EC2_SSH_KEY }}" > ~/.ssh/id_rsa
33 | chmod 600 ~/.ssh/id_rsa
34 | ssh-keyscan -H ${{ secrets.EC2_HOST }} >> ~/.ssh/known_hosts
35 |
36 | # Deploy to EC2
37 | ssh ${{ secrets.EC2_USER }}@${{ secrets.EC2_HOST }} "
38 | # Export AWS credentials
39 | export AWS_ACCESS_KEY_ID='${{ secrets.AWS_ACCESS_KEY_ID }}'
40 | export AWS_SECRET_ACCESS_KEY='${{ secrets.AWS_SECRET_ACCESS_KEY }}'
41 | export AWS_REGION='${{ secrets.AWS_REGION }}'
42 |
43 | # Create and setup directories
44 | sudo mkdir -p /data/newsletter/vault
45 | sudo chown -R ${{ secrets.EC2_USER }}:${{ secrets.EC2_USER }} /data/newsletter
46 |
47 | # Sync S3
48 | aws s3 sync s3://${{ secrets.S3_CONFIG_BUCKET }}/vault/ /data/newsletter/vault/
49 |
50 | # Deploy with Docker
51 | sudo docker build -t ailert-newsletter:${{ env.DOCKER_IMAGE_TAG }} https://github.com/${{ github.repository }}.git#${{ github.ref }}
52 |
53 | sudo docker stop ailert-newsletter || true
54 | sudo docker rm ailert-newsletter || true
55 |
56 | sudo docker run -d \
57 | --name ailert-newsletter \
58 | -p 5000:5000 \
59 | -v /data/newsletter/vault:/app/db_handler/vault \
60 | --restart unless-stopped \
61 | -e AWS_ACCESS_KEY_ID='${{ secrets.AWS_ACCESS_KEY_ID }}' \
62 | -e AWS_SECRET_ACCESS_KEY='${{ secrets.AWS_SECRET_ACCESS_KEY }}' \
63 | -e AWS_REGION='${{ secrets.AWS_REGION }}' \
64 | -e SMTP_USERNAME='${{ secrets.SMTP_USERNAME }}' \
65 | -e SMTP_PASSWORD='${{ secrets.SMTP_PASSWORD }}' \
66 | -e JWT_SECRET='${{ secrets.JWT_SECRET }}' \
67 | ailert-newsletter:${{ env.DOCKER_IMAGE_TAG }}
68 |
69 | sudo docker system prune -f --volumes
70 | "
71 |
72 | - name: Cleanup
73 | if: always()
74 | run: rm -f ~/.ssh/id_rsa
--------------------------------------------------------------------------------
/static/newsletter.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | AiLert Weekly Newsletter
7 |
59 |
60 |
61 |
62 |
63 |
64 |
68 |
69 |
79 |
80 | {{content}}
81 |
82 |
91 |
92 |
93 |
--------------------------------------------------------------------------------
/services/apps/gh_service.py:
--------------------------------------------------------------------------------
1 | import jwt
2 | import time
3 | import requests
4 | import configparser
5 | from db_handler import Repo
6 | from bs4 import BeautifulSoup
7 |
8 | config = configparser.ConfigParser()
9 | config.read('db_handler/vault/secrets.ini')
10 |
11 | default_pem = config["GitHub"]["pem_path"]
12 | default_clientId = config["GitHub"]["client_id"]
13 |
14 | class GitHubScanner:
15 | def __init__(self, site_url, ftype, top_n=5, pem_path=default_pem, client_id=default_clientId):
16 | self.site_url = site_url
17 | self.ftype = ftype
18 | self.top_n = top_n
19 | self.pem_path = pem_path
20 | self.client_id = client_id
21 | self.response = []
22 |
23 | def _gh_authenticate(self):
24 | with open(self.pem_path, 'rb') as pem_file:
25 | signing_key = pem_file.read()
26 |
27 | payload = {
28 | 'iat': int(time.time()),
29 | 'exp': int(time.time()) + 600,
30 | 'iss': self.client_id
31 | }
32 |
33 | encoded_jwt = jwt.encode(payload, signing_key, algorithm='RS256')
34 | return encoded_jwt
35 |
36 | def _extract_from_html(self, link):
37 | repos = []
38 | try:
39 | response = requests.get(link)
40 | response.raise_for_status()
41 | soup = BeautifulSoup(response.text, 'html.parser')
42 | repo_list = soup.find_all('article', class_='Box-row')
43 |
44 | for repo in repo_list:
45 | name = repo.find('h2', class_='h3').text.strip().replace('\n', '').replace(' ', '')
46 |
47 | description = repo.find('p', class_='col-9 color-fg-muted my-1 pr-4')
48 | description = description.text.strip() if description else "No description provided."
49 |
50 | stars_element = repo.find('a', class_='Link Link--muted d-inline-block mr-3') or \
51 | repo.find('a', class_='Link--muted d-inline-block mr-3')
52 | stars = stars_element.text.strip().replace(',', '') if stars_element else "0"
53 |
54 | fork_elements = repo.find_all('a', class_='Link Link--muted d-inline-block mr-3') or \
55 | repo.find_all('a', class_='Link--muted d-inline-block mr-3')
56 | forks = fork_elements[1].text.strip().replace(',', '') if len(fork_elements) > 1 else "0"
57 |
58 | repos.append({
59 | 'name': name,
60 | 'description': description,
61 | 'stars': str(stars),
62 | 'forks': str(forks)
63 | })
64 |
65 | return repos[:self.top_n]
66 | except Exception as e:
67 | print(f"Error: {str(e)}")
68 |
69 | def _daily_trending_repos(self):
70 | repositories = self._extract_from_html(self.site_url)
71 | return repositories
72 |
73 | def _weekly_trending_repos(self):
74 | repositories = self._extract_from_html(self.site_url)
75 | return repositories
76 |
77 | async def get_trending_repos(self):
78 | if self.ftype == "daily":
79 | repositories = self._daily_trending_repos()
80 | else:
81 | repositories = self._weekly_trending_repos()
82 | self.response.extend(Repo(
83 | name = repo["name"],
84 | link = "",
85 | summary = repo["description"],
86 | source = "GitHub",
87 | engagement = repo["stars"]) for repo in repositories)
88 | return self.response
--------------------------------------------------------------------------------
/services/apps/ph_service.py:
--------------------------------------------------------------------------------
1 | import requests
2 | from bs4 import BeautifulSoup
3 | from datetime import datetime, timedelta
4 |
5 | class ProductHuntScanner:
6 | def __init__(self, site_url, graph_url, top_n=5):
7 | self.site_url = site_url
8 | self.graph_url = graph_url
9 | self.top_n = top_n
10 | self.response = []
11 |
12 | def get_last_week_top_products(self):
13 | headers = {
14 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
15 | }
16 | try:
17 | response = requests.get(self.site_url, headers=headers)
18 | response.raise_for_status()
19 | soup = BeautifulSoup(response.text, "html.parser")
20 | last_week_section = soup.find("section", string="Last Week's Top Products")
21 | if not last_week_section:
22 | print("Could not find 'Last Week's Top Products' section.")
23 | return []
24 |
25 | products = []
26 | for product in last_week_section.find_all("li"):
27 | title = product.find("h3").get_text(strip=True) if product.find("h3") else "No Title"
28 | link = product.find("a", href=True)["href"] if product.find("a", href=True) else "No Link"
29 | products.append({"title": title, "link": f"{self.site_url}{link}"})
30 |
31 | return products
32 | except Exception as e:
33 | print(f"Error fetching data: {e}")
34 | return []
35 |
36 | def get_last_month_top_products(self, api_key):
37 | query = """
38 | query ($dateFrom: DateTime!, $dateTo: DateTime!) {
39 | posts(first: 10, postedAfter: $dateFrom, postedBefore: $dateTo, order: VOTES_COUNT) {
40 | edges {
41 | node {
42 | id
43 | name
44 | tagline
45 | url
46 | votesCount
47 | }
48 | }
49 | }
50 | }
51 | """
52 | today = datetime.utcnow()
53 | first_day_of_this_month = datetime(today.year, today.month, 1)
54 | last_day_of_last_month = first_day_of_this_month - timedelta(days=1)
55 | first_day_of_last_month = datetime(last_day_of_last_month.year, last_day_of_last_month.month, 1)
56 |
57 | variables = {
58 | "dateFrom": first_day_of_last_month.isoformat(),
59 | "dateTo": last_day_of_last_month.isoformat()
60 | }
61 |
62 | # Set headers with API key
63 | headers = {
64 | "Authorization": f"Bearer {api_key}",
65 | "Content-Type": "application/json"
66 | }
67 |
68 | try:
69 | response = requests.post(self.graph_url, json={"query": query, "variables": variables}, headers=headers)
70 | response.raise_for_status()
71 | data = response.json()
72 |
73 | products = data.get("data", {}).get("posts", {}).get("edges", [])
74 | if not products:
75 | print("No products found for last month.")
76 | return []
77 |
78 | result = []
79 | for product in products:
80 | node = product["node"]
81 | result.append({
82 | "title": node["name"],
83 | "summary": node["tagline"],
84 | "link": node["url"],
85 | "engagement": node["votesCount"],
86 | "source": "Product Hunt"
87 | })
88 |
89 | return result
90 | except Exception as e:
91 | print(f"Error fetching data: {e}")
92 | return []
93 |
--------------------------------------------------------------------------------
/services/crawler/rss_crawler.py:
--------------------------------------------------------------------------------
1 | import pytz
2 | import html
3 | import feedparser
4 | from datetime import datetime
5 |
6 | import requests
7 | import xml.etree.ElementTree as et
8 | from urllib.parse import urlparse
9 |
10 |
11 | def is_rss_feed(url):
12 | try:
13 | parsed_url = urlparse(url)
14 | if not all([parsed_url.scheme, parsed_url.netloc]):
15 | return False
16 |
17 | headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
18 | response = requests.get(url, headers=headers, timeout=10)
19 | response.raise_for_status()
20 |
21 | content_type = response.headers.get('content-type', '').lower()
22 | if not any(valid_type in content_type for valid_type in ['application/rss+xml', 'application/xml', 'text/xml']):
23 | return False
24 |
25 | root = et.fromstring(response.content)
26 | rss_indicators = [
27 | 'rss',
28 | 'feed',
29 | 'channel',
30 | 'item',
31 | 'entry'
32 | ]
33 |
34 | if root.tag in rss_indicators:
35 | return True
36 |
37 | for child in root:
38 | if child.tag in rss_indicators:
39 | return True
40 | return False
41 | except requests.RequestException:
42 | return False
43 | except et.ParseError:
44 | return False
45 | except Exception:
46 | return False
47 |
48 | def load_feed(self, url):
49 | self.feed_url = url
50 | try:
51 | self.feed_data = feedparser.parse(url)
52 | return len(self.feed_data.entries) > 0
53 | except Exception as e:
54 | print(f"Error loading feed: {e}")
55 | return False
56 |
57 | def get_feed_info(self):
58 | if not self.feed_data:
59 | return None
60 |
61 | return {
62 | 'title': self.feed_data.feed.get('title', 'No title'),
63 | 'description': self.feed_data.feed.get('description', 'No description'),
64 | 'link': self.feed_data.feed.get('link', ''),
65 | 'last_updated': self.feed_data.feed.get('updated', 'No update date')
66 | }
67 |
68 | def get_entries(self, limit=None, sort_by_date=True):
69 | if not self.feed_data:
70 | return []
71 |
72 | entries = []
73 | for entry in self.feed_data.entries:
74 | clean_entry = {
75 | 'title': html.unescape(entry.get('title', 'No title')),
76 | 'link': entry.get('link', ''),
77 | 'description': html.unescape(entry.get('description', 'No description')),
78 | 'author': entry.get('author', 'Unknown author'),
79 | 'published': entry.get('published', 'No publication date'),
80 | 'updated': entry.get('updated', entry.get('published', 'No update date'))
81 | }
82 | try:
83 | date = entry.get('updated_parsed', entry.get('published_parsed'))
84 | if date:
85 | clean_entry['timestamp'] = datetime(*date[:6], tzinfo=pytz.UTC)
86 | except (TypeError, ValueError):
87 | clean_entry['timestamp'] = None
88 |
89 | entries.append(clean_entry)
90 | if sort_by_date:
91 | entries.sort(key=lambda x: x['timestamp'] if x['timestamp'] else datetime.min.replace(tzinfo=pytz.UTC),
92 | reverse=True)
93 | if limit:
94 | entries = entries[:limit]
95 |
96 | return entries
97 |
98 | def search_entries(self, keyword, case_sensitive=False):
99 | if not self.feed_data:
100 | return []
101 |
102 | matches = []
103 | entries = self.get_entries()
104 |
105 | for entry in entries:
106 | search_text = f"{entry['title']} {entry['description']}"
107 | if not case_sensitive:
108 | search_text = search_text.lower()
109 | keyword = keyword.lower()
110 |
111 | if keyword in search_text:
112 | matches.append(entry)
113 |
114 | return matches
115 |
--------------------------------------------------------------------------------
/app/main.py:
--------------------------------------------------------------------------------
1 | import time
2 | import logging
3 | import schedule
4 | import configparser
5 | import pandas as pd
6 | from utils import utility
7 | from typing import Optional
8 | from services import EmailService
9 | from threading import Thread, Event
10 | from db_handler import sites, Dynamo, TaskType
11 | from builder.builder import NewsletterBuilder
12 |
13 | logger = logging.getLogger(__name__)
14 | logging.basicConfig(level=logging.INFO)
15 |
16 | stop_event = Event()
17 | scheduler_thread: Optional[Thread] = None
18 | scheduler_state = {"is_running": False, "is_paused": False, "task_type": None}
19 |
20 | config = configparser.ConfigParser()
21 | config.read('db_handler/vault/secrets.ini')
22 | region = config["Dynamo"]["region"]
23 |
24 | dynamo = Dynamo(region)
25 |
26 | df = pd.read_csv("db_handler/vault/recipients.csv")
27 | subscribers = df['email'].tolist()
28 |
29 | def run_scheduler(task_type: str):
30 | if task_type == TaskType.WEEKLY.value:
31 | schedule.every().monday.at("00:00").do(weekly_task)
32 | logging.info("Weekly scheduler started")
33 | else:
34 | schedule.every().day.at("00:00").do(daily_task)
35 | logging.info("Daily scheduler started")
36 |
37 | while not stop_event.is_set():
38 | if not scheduler_state["is_paused"]:
39 | schedule.run_pending()
40 | time.sleep(1)
41 |
42 | schedule.clear()
43 | scheduler_state["is_running"] = False
44 | logging.info("Scheduler stopped")
45 |
46 |
47 | async def generate_newsletter(sections, task_type):
48 | if task_type == TaskType.WEEKLY.value:
49 | urls = sites["gh_weekly_url"]
50 | else:
51 | urls = sites["gh_daily_url"]
52 |
53 | weekly = NewsletterBuilder({
54 | "gh_url": urls,
55 | "gh_ftype": task_type},
56 | dynamo)
57 | weekly.set_sections(sections)
58 | content = await weekly.section_generator()
59 | newsletter_html = await weekly.build(content)
60 | return newsletter_html
61 |
62 |
63 | async def daily_task():
64 | daily = NewsletterBuilder({
65 | "gh_url": sites["gh_daily_url"],
66 | "gh_ftype": "daily"},
67 | dynamo)
68 | daily.set_sections(["news"])
69 | logger.info(f"starting generator")
70 | content = await daily.section_generator()
71 | logger.info(f"sections generated")
72 | newsletter_html = await daily.build(content)
73 | newsletter_html = utility.inline_css(newsletter_html, "static")
74 | newsletter_html = utility.inline_svg_images(newsletter_html, "static")
75 | logger.info("content updated")
76 | item = save_to_db(newsletter_html, "daily")
77 | logger.info(f"saved to db, sending email")
78 | await send_email(content=item["content"])
79 | logger.info(f"email sent")
80 |
81 |
82 | async def weekly_task():
83 | weekly = NewsletterBuilder({
84 | "gh_url": sites["gh_weekly_url"],
85 | "gh_ftype": "weekly"},
86 | dynamo)
87 | weekly.set_sections(["all"])
88 | logger.info(f"starting generator")
89 | content = await weekly.section_generator()
90 | logger.info(f"sections generated")
91 | newsletter_html = await weekly.build(content)
92 | logger.info(f"newsletter build complete")
93 | newsletter_html = utility.inline_css(newsletter_html, "static")
94 | newsletter_html = utility.inline_svg_images(newsletter_html, "static")
95 | logger.info("content updated")
96 | item = save_to_db(newsletter_html, "weekly")
97 | logger.info(f"saved to db, sending email")
98 | await send_email(content=item["content"])
99 | logger.info(f"email sent")
100 |
101 |
102 | def save_to_db(content, content_type):
103 | try:
104 | item = {
105 | "item_name": "newsletter",
106 | "type": content_type,
107 | "content": content,
108 | "created": utility.get_formatted_timestamp()
109 | }
110 |
111 | item_id = utility.generate_deterministic_id(item, key_fields=["item_name", "type"], prefix="nl")
112 | item["newsletterId"] = item_id
113 | dynamo.add_item("newsletter", "newsletterId", item, False)
114 | return item
115 | except Exception as e:
116 | logging.info("Error saving to dynamo db", e)
117 |
118 |
119 | async def send_email(content=None, template_id=None, recipients=subscribers):
120 | email_service = EmailService(
121 | recipients=recipients,
122 | body_text = content,
123 | template_id=template_id
124 | )
125 | result = email_service.send_email()
126 | return result
127 |
--------------------------------------------------------------------------------
/services/email_service.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import configparser
3 | from typing import List, Optional
4 | from sendgrid import SendGridAPIClient
5 | from sendgrid.helpers.mail import Mail, Content
6 |
7 |
8 | config = configparser.ConfigParser()
9 | config.read('db_handler/vault/secrets.ini')
10 | api_key = config["Sendgrid"]["api_key"]
11 |
12 | class EmailService:
13 | def __init__(self, recipients: Optional[List[str]] = None,
14 | subject: Optional[str] = None,
15 | body_text: Optional[str] = None,
16 | template_id: Optional[str] = None):
17 | self.sender = "weekly@ailert.tech"
18 | self.recipients = recipients if recipients else []
19 | self.subject = subject if subject else "Weekly Newsletter"
20 | self.charset = "UTF-8"
21 | self.body_text = body_text
22 | self.template_id = template_id
23 |
24 | # Initialize SendGrid client
25 | try:
26 | self.sg_client = SendGridAPIClient(api_key=api_key)
27 | except Exception as e:
28 | logging.error(f"Failed to initialize SendGrid client: {str(e)}")
29 | raise
30 |
31 | def _create_mail_object(self, recipient: str) -> Mail:
32 | """Create a Mail object for a single recipient"""
33 | from_email = self.sender
34 | to_email = recipient
35 |
36 | mail = Mail(
37 | from_email=from_email,
38 | to_emails=to_email,
39 | subject=self.subject,
40 | html_content=self.body_text
41 | )
42 |
43 | # if self.template_id:
44 | # mail.template_id = self.template_id
45 | # else:
46 | # content = Content("text/html", self.body_text)
47 | # mail.content = [content]
48 |
49 | return mail
50 |
51 | def send_email(self) -> dict:
52 | """
53 | Send emails to all recipients using SendGrid
54 | Returns:
55 | dict: Status of email sending operation
56 | """
57 | if not self.recipients:
58 | return {
59 | "status": "error",
60 | "message": "No recipients specified",
61 | "failed_recipients": []
62 | }
63 |
64 | failed_recipients = []
65 | successful_count = 0
66 |
67 | for recipient in self.recipients:
68 | try:
69 | mail = self._create_mail_object(recipient)
70 | response = self.sg_client.send(mail)
71 |
72 | if response.status_code in [200, 201, 202]:
73 | successful_count += 1
74 | logging.info(f"Email sent successfully to {recipient}")
75 | else:
76 | failed_recipients.append({
77 | "email": recipient,
78 | "error": f"SendGrid API returned status code: {response.status_code}"
79 | })
80 | logging.error(f"Failed to send email to {recipient}. Status code: {response.status_code}")
81 |
82 | except Exception as e:
83 | failed_recipients.append({
84 | "email": recipient,
85 | "error": str(e)
86 | })
87 | logging.error(f"Exception while sending email to {recipient}: {str(e)}")
88 |
89 | status = "success" if not failed_recipients else "partial_success" if successful_count else "error"
90 |
91 | return {
92 | "status": status,
93 | "message": f"Successfully sent {successful_count} out of {len(self.recipients)} emails",
94 | "failed_recipients": failed_recipients
95 | }
96 |
97 | def add_recipient(self, recipient: str) -> None:
98 | """Add a single recipient to the email list"""
99 | if recipient not in self.recipients:
100 | self.recipients.append(recipient)
101 |
102 | def add_recipients(self, recipients: List[str]) -> None:
103 | """Add multiple recipients to the email list"""
104 | for recipient in recipients:
105 | self.add_recipient(recipient)
106 |
107 | def set_template_id(self, template_id: str) -> None:
108 | """Set the SendGrid template ID"""
109 | self.template_id = template_id
110 |
111 | def set_body_text(self, body_text: str) -> None:
112 | """Set the email body text"""
113 | self.body_text = body_text
114 |
115 | def set_subject(self, subject: str) -> None:
116 | """Set the email subject"""
117 | self.subject = subject
--------------------------------------------------------------------------------
/static/style.css:
--------------------------------------------------------------------------------
1 | /* Base Styles */
2 | body {
3 | font-family: 'Segoe UI', -apple-system, BlinkMacSystemFont, sans-serif;
4 | line-height: 1.4;
5 | margin: 0;
6 | padding: 0;
7 | background-color: #f0f2f5;
8 | color: #2d3748;
9 | }
10 |
11 | .container {
12 | max-width: 600px;
13 | margin: 0 auto;
14 | background-color: white;
15 | box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
16 | }
17 |
18 | /* Top Navigation */
19 | .top-nav {
20 | padding: 4px 0;
21 | text-align: right;
22 | margin-right: 12px;
23 | margin-bottom: 2px;
24 | }
25 |
26 | .top-nav a {
27 | color: #2c3e50;
28 | text-decoration: none;
29 | padding: 3px 8px;
30 | margin: 0 2px;
31 | font-size: 10px;
32 | border-radius: 12px;
33 | border: 1px solid #e5e7eb;
34 | }
35 |
36 | /* Header Styles */
37 | .header {
38 | background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%);
39 | color: white;
40 | padding: 25px;
41 | border-radius: 12px;
42 | margin: 4px 12px 12px 12px;
43 | }
44 |
45 | .header-top {
46 | display: flex;
47 | align-items: center;
48 | margin-bottom: 8px;
49 | }
50 |
51 | .header-content {
52 | position: relative;
53 | z-index: 1;
54 | padding-right: 60px;
55 | }
56 |
57 | .logo {
58 | color: #ffffff;
59 | background: rgba(255, 255, 255);
60 | width: 40px;
61 | height: 40px;
62 | border-radius: 8px;
63 | margin-right: 15px;
64 | }
65 |
66 | .brand-name {
67 | font-size: 24px;
68 | font-weight: 700;
69 | letter-spacing: -0.5px;
70 | }
71 |
72 | .header h2 {
73 | font-size: 24px;
74 | font-weight: 400;
75 | margin: 0 0 12px 0;
76 | line-height: 1.3;
77 | }
78 |
79 | .header p {
80 | font-size: 16px;
81 | line-height: 1.4;
82 | margin: 0;
83 | opacity: 0.9;
84 | }
85 |
86 | /* Content Sections */
87 | .section {
88 | margin: 12px;
89 | padding: 16px;
90 | border-radius: 8px;
91 | background: white;
92 | box-shadow: 0 1px 2px rgba(0, 0, 0, 0.04);
93 | }
94 |
95 | .summary-section {
96 | background: linear-gradient(135deg, #e0e7ff 0%, #f0f7ff 100%);
97 | }
98 |
99 | .section-title {
100 | color: #4f46e5;
101 | font-size: 18px;
102 | font-weight: 700;
103 | margin-bottom: 12px;
104 | padding-bottom: 8px;
105 | border-bottom: 1px solid #b0b8e6;
106 | }
107 |
108 | /* News Items */
109 | .news-item {
110 | padding: 12px;
111 | margin-bottom: 12px;
112 | background: white;
113 | border-radius: 6px;
114 | border: 1px solid #e5e7eb;
115 | }
116 |
117 | .news-title {
118 | color: #4338ca;
119 | font-size: 16px;
120 | font-weight: 600;
121 | margin-bottom: 6px;
122 | }
123 |
124 | .news-item p {
125 | margin: 0 0 8px 0;
126 | font-size: 14px;
127 | line-height: 1.4;
128 | }
129 |
130 | /* Trending Button */
131 | .trending-button {
132 | display: inline-flex;
133 | align-items: center;
134 | background: linear-gradient(135deg, #f0f7ff 0%, #e0e7ff 100%);
135 | padding: 4px 10px;
136 | border-radius: 12px;
137 | font-size: 12px;
138 | color: #4338ca;
139 | margin-top: 8px;
140 | }
141 |
142 | .trending-button i {
143 | margin-right: 6px;
144 | color: #6366f1;
145 | }
146 |
147 | /* Share Section */
148 | .share-section {
149 | /* background: linear-gradient(135deg, #818cf8 0%, #6366f1 100%);*/
150 | background: linear-gradient(135deg, #f0f7ff 0%, #e0e7ff 100%);
151 | color: black;
152 | text-align: center;
153 | padding: 20px 16px;
154 | }
155 |
156 | .share-button {
157 | padding: 8px 16px;
158 | font-size: 13px;
159 | border-radius: 16px;
160 | margin: 6px;
161 | }
162 |
163 | /* Feedback Section */
164 | .feedback-section {
165 | background: linear-gradient(135deg, #f0f7ff 0%, #e0e7ff 100%);
166 | text-align: center;
167 | padding: 20px 16px;
168 | }
169 |
170 | .feedback-button {
171 | padding: 8px 16px;
172 | border-radius: 16px;
173 | font-size: 13px;
174 | margin: 0 6px;
175 | }
176 |
177 | /* Read Time */
178 | .read-time {
179 | display: inline-flex;
180 | align-items: center;
181 | padding: 4px 10px;
182 | border-radius: 12px;
183 | margin-top: 8px;
184 | font-size: 12px;
185 | }
186 |
187 | /* Footer */
188 | .footer {
189 | background: linear-gradient(135deg, #4338ca 0%, #3730a3 100%);
190 | color: white;
191 | padding: 20px 16px;
192 | text-align: center;
193 | font-size: 12px;
194 | }
195 |
196 | .footer a {
197 | padding: 0 8px;
198 | }
199 |
200 | /* Responsive Design */
201 | @media (max-width: 600px) {
202 | .section {
203 | margin: 8px;
204 | padding: 12px;
205 | }
206 |
207 | .header {
208 | padding: 20px;
209 | margin: 4px 8px 8px 8px;
210 | }
211 |
212 | .header h2 {
213 | font-size: 20px;
214 | }
215 |
216 | .news-item {
217 | padding: 10px;
218 | margin-bottom: 10px;
219 | }
220 | }
--------------------------------------------------------------------------------
/db_handler/db.py:
--------------------------------------------------------------------------------
1 | """
2 | Database support functions.
3 | The idea is that none of the individual scripts deal directly with the file system.
4 | Any of the file system I/O and the associated settings are in this single file.
5 | """
6 |
7 | import os
8 | import sqlite3, zlib, pickle, tempfile
9 | from sqlitedict import SqliteDict
10 | from contextlib import contextmanager
11 |
12 |
13 | DATA_DIR = 'data'
14 |
15 | @contextmanager
16 | def _tempfile(*args, **kws):
17 | """ Context for temporary file.
18 | Will find a free temporary filename upon entering
19 | and will try to delete the file on leaving
20 | Parameters
21 | ----------
22 | suffix : string
23 | optional file suffix
24 | """
25 |
26 | fd, name = tempfile.mkstemp(*args, **kws)
27 | os.close(fd)
28 | try:
29 | yield name
30 | finally:
31 | try:
32 | os.remove(name)
33 | except OSError as e:
34 | if e.errno == 2:
35 | pass
36 | else:
37 | raise e
38 |
39 |
40 | @contextmanager
41 | def open_atomic(filepath, *args, **kwargs):
42 | """ Open temporary file object that atomically moves to destination upon
43 | exiting.
44 | Allows reading and writing to and from the same filename.
45 | Parameters
46 | ----------
47 | filepath : string
48 | the file path to be opened
49 | fsync : bool
50 | whether to force write the file to disk
51 | kwargs : mixed
52 | Any valid keyword arguments for :code:`open`
53 | """
54 | fsync = kwargs.pop('fsync', False)
55 |
56 | with _tempfile(dir=os.path.dirname(filepath)) as tmppath:
57 | with open(tmppath, *args, **kwargs) as f:
58 | yield f
59 | if fsync:
60 | f.flush()
61 | os.fsync(f.fileno())
62 | os.rename(tmppath, filepath)
63 |
64 | def safe_pickle_dump(obj, fname):
65 | """
66 | prevents a case where one process could be writing a pickle file
67 | while another process is reading it, causing a crash. the solution
68 | is to write the pickle file to a temporary file and then move it.
69 | """
70 | with open_atomic(fname, 'wb') as f:
71 | pickle.dump(obj, f, -1) # -1 specifies highest binary protocol
72 |
73 | # -----------------------------------------------------------------------------
74 |
75 | class CompressedSqliteDict(SqliteDict):
76 | """ overrides the encode/decode methods to use zlib, so we get compressed storage """
77 |
78 | def __init__(self, *args, **kwargs):
79 |
80 | def encode(obj):
81 | return sqlite3.Binary(zlib.compress(pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)))
82 |
83 | def decode(obj):
84 | return pickle.loads(zlib.decompress(bytes(obj)))
85 |
86 | super().__init__(*args, **kwargs, encode=encode, decode=decode)
87 |
88 | # -----------------------------------------------------------------------------
89 | """
90 | some docs to self:
91 | flag='c': default mode, open for read/write, and creating the db/table if necessary
92 | flag='r': open for read-only
93 | """
94 |
95 | # stores info about papers, and also their lighter-weight metadata
96 | PAPERS_DB_FILE = os.path.join(DATA_DIR, 'papers.db')
97 | # stores account-relevant info, like which tags exist for which papers
98 | DICT_DB_FILE = os.path.join(DATA_DIR, 'dict.db')
99 |
100 | def get_papers_db(flag='r', autocommit=True):
101 | assert flag in ['r', 'c']
102 | pdb = CompressedSqliteDict(PAPERS_DB_FILE, tablename='papers', flag=flag, autocommit=autocommit)
103 | return pdb
104 |
105 | def get_metas_db(flag='r', autocommit=True):
106 | assert flag in ['r', 'c']
107 | mdb = SqliteDict(PAPERS_DB_FILE, tablename='metas', flag=flag, autocommit=autocommit)
108 | return mdb
109 |
110 | def get_tags_db(flag='r', autocommit=True):
111 | assert flag in ['r', 'c']
112 | tdb = CompressedSqliteDict(DICT_DB_FILE, tablename='tags', flag=flag, autocommit=autocommit)
113 | return tdb
114 |
115 | def get_last_active_db(flag='r', autocommit=True):
116 | assert flag in ['r', 'c']
117 | ladb = SqliteDict(DICT_DB_FILE, tablename='last_active', flag=flag, autocommit=autocommit)
118 | return ladb
119 |
120 | def get_email_db(flag='r', autocommit=True):
121 | assert flag in ['r', 'c']
122 | edb = SqliteDict(DICT_DB_FILE, tablename='email', flag=flag, autocommit=autocommit)
123 | return edb
124 |
125 | # -----------------------------------------------------------------------------
126 | """
127 | our "feature store" is currently just a pickle file, may want to consider hdf5 in the future
128 | """
129 |
130 | # stores tfidf features a bunch of other metadata
131 | FEATURES_FILE = os.path.join(DATA_DIR, 'features.p')
132 |
133 | def save_features(features):
134 | """ takes the features dict and save it to disk in a simple pickle file """
135 | safe_pickle_dump(features, FEATURES_FILE)
136 |
137 | def load_features():
138 | """ loads the features dict from disk """
139 | with open(FEATURES_FILE, 'rb') as f:
140 | features = pickle.load(f)
141 | return features
142 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # AiLert 
2 |
3 | An open-source AI newsletter platform that aggregates and curates AI content from across the internet.
4 |
5 | ## Overview
6 | AiLert automatically aggregates content from 150+ sources including research papers, news sites, GitHub repositories, and events to create customizable AI newsletters. Built with Python and powered by AWS, it helps communities and teams stay updated with the latest in AI.
7 |
8 | ## Features
9 | - 📚 Multi-source aggregation (150+ sources)
10 | - 🎯 Smart content categorization
11 | - 📊 Engagement tracking
12 | - ⚡ Async content processing
13 | - 📧 Customizable newsletter templates
14 | - 📅 Daily and weekly digest options
15 |
16 | ## Content Sources
17 | - Research Papers (arXiv)
18 | - Industry News (RSS feeds)
19 | - GitHub Trending Repositories
20 | - AI Competitions & Events
21 | - Product Launches
22 | - Technical Blogs
23 |
24 | ## Tech Stack
25 | - Python 3.8+
26 | - Flask
27 | - AWS DynamoDB
28 | - BeautifulSoup4
29 | - Feedparser
30 | - Schedule
31 | - Pydantic
32 | - uvicorn
33 |
34 | ## 📫 How to Subscribe
35 |
36 | 1. Visit https://ailert.tech
37 | 2. Navigate to the newsletter section
38 | 3. Enter your email address
39 | 4. Confirm your subscription
40 |
41 | ## ✨ What Our Readers Say
42 |
43 | `"AIlert's newsletter helps me stay on top of AI developments without getting overwhelmed" - Tech Lead at Fortune 500`
44 |
45 |
46 | `"The perfect blend of technical depth and practical insights" - AI Researcher`
47 |
48 | ## 🔒 Your Privacy Matters
49 |
50 | - No spam, ever
51 | - Unsubscribe anytime
52 | - Your data is never shared or sold
53 |
54 | ## 📅 Publication Schedule
55 | Receive our carefully curated insights every week, delivered straight to your inbox.
56 |
57 | ## Installation
58 |
59 | 1. Clone the repository:
60 | ```bash
61 | git clone https://github.com/yourusername/ailert.git
62 | cd ailert
63 | ```
64 |
65 | 2. Install dependencies:
66 | ```bash
67 | pip install -r requirements.txt
68 | ```
69 |
70 | 3. Set up AWS credentials:
71 | ```bash
72 | export AWS_ACCESS_KEY_ID="your_access_key"
73 | export AWS_SECRET_ACCESS_KEY="your_secret_key"
74 | export AWS_REGION="your_region"
75 | ```
76 |
77 | 4. Run the application:
78 | ```bash
79 | python main.py
80 | ```
81 |
82 | ## Project Structure
83 | ```
84 | ailert/
85 | ├── builder/ # Newsletter generation
86 | ├── db_handler/ # Db operations manager
87 | ├── app/ # Core functions of the application
88 | ├── router/ # REST Api routes
89 | ├── services/ # Content aggregation services
90 | ├── static/ # Templates and assets
91 | ├── utils/ # Application common utilities
92 | ├── main.py # Flask application
93 | └── requirements.txt # Dependencies
94 | ```
95 |
96 | ## Contributing
97 | We welcome contributions of all kinds! Here are some ways you can help:
98 |
99 | ### Development
100 | - Add new content sources
101 | - Improve content categorization
102 | - Optimize performance
103 | - Add new features
104 | - Fix bugs
105 | - Write tests
106 |
107 | ### Documentation
108 | - Improve technical docs
109 | - Write tutorials
110 | - Add code comments
111 | - Create examples
112 |
113 | ### Design
114 | - Improve newsletter templates
115 | - Create visual assets
116 | - Enhance UI/UX
117 |
118 | ### Content
119 | - Add new RSS feeds
120 | - Improve content filtering
121 | - Suggest new features
122 |
123 | ## Getting Started with Contributing
124 |
125 | 1. Fork the repository
126 | 2. Create a new branch
127 | ```bash
128 | git checkout -b feature/your-feature
129 | ```
130 | 3. Make your changes
131 | 4. Write or update tests
132 | 5. Submit a pull request
133 |
134 | ### Development Setup
135 | 1. Install development dependencies:
136 | ```bash
137 | pip install -r requirements-dev.txt
138 | ```
139 |
140 | 2. Run tests:
141 | ```bash
142 | python -m pytest
143 | ```
144 |
145 | ## API Documentation
146 |
147 | ### Newsletter Builder
148 | ```python
149 | from builder.builder import NewsletterBuilder
150 |
151 | # Create daily newsletter
152 | daily = NewsletterBuilder({
153 | "gh_url": "github_url",
154 | "gh_ftype": "daily"
155 | })
156 | daily.set_sections(["news"])
157 | content = await daily.section_generator()
158 | ```
159 |
160 | ### Content Services
161 | Each service handles different content types:
162 | - `NewsService`: Industry news
163 | - `ResearchService`: Research papers
164 | - `GitHubScanner`: Trending repositories
165 | - `ProductService`: New AI products
166 | - `CompetitionService`: AI competitions
167 | - `EventsService`: Upcoming events
168 |
169 | ## License
170 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
171 |
172 | ## Acknowledgments
173 | - All our amazing contributors
174 | - The open-source community
175 | - RSS feed providers
176 | - Content creators
177 |
178 | ## Contact
179 | - Create an issue for bug reports
180 | - Start a discussion for feature requests
181 | - Join our Discord community [link]
182 |
183 | ## Roadmap
184 | - [ ] Add more content sources
185 | - [ ] Implement ML-based content ranking
186 | - [ ] Add personalization options
187 | - [ ] Create API endpoints
188 | - [ ] Add email delivery system
189 | - [ ] Improve template customization
190 |
191 | ---
192 | Built with ❤️ for the AI community
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | We as members, contributors, and leaders pledge to make participation in our
6 | community a harassment-free experience for everyone, regardless of age, body
7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
8 | identity and expression, level of experience, education, socio-economic status,
9 | nationality, personal appearance, race, religion, or sexual identity
10 | and orientation.
11 |
12 | We pledge to act and interact in ways that contribute to an open, welcoming,
13 | diverse, inclusive, and healthy community.
14 |
15 | ## Our Standards
16 |
17 | Examples of behavior that contributes to a positive environment for our
18 | community include:
19 |
20 | * Demonstrating empathy and kindness toward other people
21 | * Being respectful of differing opinions, viewpoints, and experiences
22 | * Giving and gracefully accepting constructive feedback
23 | * Accepting responsibility and apologizing to those affected by our mistakes,
24 | and learning from the experience
25 | * Focusing on what is best not just for us as individuals, but for the
26 | overall community
27 |
28 | Examples of unacceptable behavior include:
29 |
30 | * The use of sexualized language or imagery, and sexual attention or
31 | advances of any kind
32 | * Trolling, insulting or derogatory comments, and personal or political attacks
33 | * Public or private harassment
34 | * Publishing others' private information, such as a physical or email
35 | address, without their explicit permission
36 | * Other conduct which could reasonably be considered inappropriate in a
37 | professional setting
38 |
39 | ## Enforcement Responsibilities
40 |
41 | Community leaders are responsible for clarifying and enforcing our standards of
42 | acceptable behavior and will take appropriate and fair corrective action in
43 | response to any behavior that they deem inappropriate, threatening, offensive,
44 | or harmful.
45 |
46 | Community leaders have the right and responsibility to remove, edit, or reject
47 | comments, commits, code, wiki edits, issues, and other contributions that are
48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
49 | decisions when appropriate.
50 |
51 | ## Scope
52 |
53 | This Code of Conduct applies within all community spaces, and also applies when
54 | an individual is officially representing the community in public spaces.
55 | Examples of representing our community include using an official e-mail address,
56 | posting via an official social media account, or acting as an appointed
57 | representative at an online or offline event.
58 |
59 | ## Enforcement
60 |
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported to the community leaders responsible for enforcement.
63 | All complaints will be reviewed and investigated promptly and fairly.
64 |
65 | All community leaders are obligated to respect the privacy and security of the
66 | reporter of any incident.
67 |
68 | ## Enforcement Guidelines
69 |
70 | Community leaders will follow these Community Impact Guidelines in determining
71 | the consequences for any action they deem in violation of this Code of Conduct:
72 |
73 | ### 1. Correction
74 |
75 | **Community Impact**: Use of inappropriate language or other behavior deemed
76 | unprofessional or unwelcome in the community.
77 |
78 | **Consequence**: A private, written warning from community leaders, providing
79 | clarity around the nature of the violation and an explanation of why the
80 | behavior was inappropriate. A public apology may be requested.
81 |
82 | ### 2. Warning
83 |
84 | **Community Impact**: A violation through a single incident or series
85 | of actions.
86 |
87 | **Consequence**: A warning with consequences for continued behavior. No
88 | interaction with the people involved, including unsolicited interaction with
89 | those enforcing the Code of Conduct, for a specified period of time. This
90 | includes avoiding interactions in community spaces as well as external channels
91 | like social media. Violating these terms may lead to a temporary or
92 | permanent ban.
93 |
94 | ### 3. Temporary Ban
95 |
96 | **Community Impact**: A serious violation of community standards, including
97 | sustained inappropriate behavior.
98 |
99 | **Consequence**: A temporary ban from any sort of interaction or public
100 | communication with the community for a specified period of time. No public or
101 | private interaction with the people involved, including unsolicited interaction
102 | with those enforcing the Code of Conduct, is allowed during this period.
103 | Violating these terms may lead to a permanent ban.
104 |
105 | ### 4. Permanent Ban
106 |
107 | **Community Impact**: Demonstrating a pattern of violation of community
108 | standards, including sustained inappropriate behavior, harassment of an
109 | individual, or aggression toward or disparagement of classes of individuals.
110 |
111 | **Consequence**: A permanent ban from any sort of public interaction within
112 | the community.
113 |
114 | ## Attribution
115 |
116 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
117 | version 2.0, available at
118 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
119 |
120 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
121 | enforcement ladder](https://github.com/mozilla/diversity).
122 |
123 | [homepage]: https://www.contributor-covenant.org
124 |
125 | For answers to common questions about this code of conduct, see the FAQ at
126 | https://www.contributor-covenant.org/faq. Translations are available at
127 | https://www.contributor-covenant.org/translations.
128 |
--------------------------------------------------------------------------------
/utils/utility.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | import csv
4 | import hashlib
5 | import logging
6 | from pathlib import Path
7 | from datetime import datetime
8 | from typing import Any, Dict, List, Optional
9 |
10 |
11 | def load_template(template_path="static/newsletter.html") -> str:
12 | with open(template_path, 'r') as f:
13 | return f.read()
14 |
15 | def generate_deterministic_id(item: Dict[str, Any], key_fields: List[str], prefix: str = "item") -> str:
16 | """
17 | Example:
18 | item = {
19 | "product_name": "Widget",
20 | "color": "blue",
21 | "timestamp": "2024-01-01"
22 | }
23 | id = generate_deterministic_id(
24 | item,
25 | key_fields=["product_name", "color"],
26 | prefix="prod"
27 | )
28 | # Result: prod-a1b2c3d4...
29 | """
30 | key_fields.sort()
31 | values = []
32 | for field in key_fields:
33 | if field not in item:
34 | raise KeyError(f"Required field '{field}' not found in item")
35 | value = item[field]
36 | values.append(str(value))
37 |
38 | combined_string = "||".join(values)
39 | hash_object = hashlib.sha256(combined_string.encode())
40 | hash_hex = hash_object.hexdigest()
41 | short_hash = hash_hex[:12]
42 | return f"{prefix}-{short_hash}"
43 |
44 | def truncate_text(text: str, max_length: int = 200) -> str:
45 | """Truncate text to specified length at the nearest word boundary."""
46 | if len(text) <= max_length:
47 | return text
48 | truncated = text[:max_length].rsplit(' ', 1)[0]
49 | return truncated.rstrip('.,!?:;')
50 |
51 | def get_formatted_timestamp():
52 | """Get current timestamp in YYYY-MM-DD format"""
53 | return datetime.now().strftime("%Y-%m-%d")
54 |
55 |
56 | def is_valid_email(email):
57 | """Validate email format"""
58 | pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
59 | return re.match(pattern, email) is not None
60 |
61 |
62 | def save_to_csv(email):
63 | csv_file = 'db_handler/vault/recipients.csv'
64 | file_exists = os.path.exists(csv_file)
65 |
66 | try:
67 | with open(csv_file, 'a', newline='') as file:
68 | writer = csv.writer(file)
69 | if not file_exists:
70 | writer.writerow(['email', 'subscribed_at'])
71 | writer.writerow([email, get_formatted_timestamp()])
72 | return True
73 | except Exception as e:
74 | logging.error(f"Error saving to CSV: {str(e)}")
75 | return False
76 |
77 |
78 | def is_email_subscribed(email):
79 | """Check if email already exists in CSV"""
80 | csv_file = 'db_handler/vault/recipients.csv'
81 | if not os.path.exists(csv_file):
82 | return False
83 |
84 | try:
85 | with open(csv_file, 'r') as file:
86 | reader = csv.reader(file)
87 | next(reader) # Skip header
88 | return any(row[0] == email for row in reader)
89 | except Exception as e:
90 | logging.error(f"Error checking subscription: {str(e)}")
91 | return False
92 |
93 |
94 | def inline_css(html_content: str, css_path: Optional[str] = None) -> str:
95 | """Replace CSS link tags with the actual CSS content in the HTML string."""
96 | css_link_pattern = r']+rel="stylesheet"[^>]+href="([^"]+)"[^>]*>'
97 |
98 | def replace_css_link(match):
99 | css_file = match.group(1)
100 |
101 | # If css_path is provided, use it, otherwise look in current directory
102 | if css_path:
103 | css_file_path = Path(css_path) / Path(css_file).name
104 | else:
105 | css_file_path = Path(css_file)
106 |
107 | try:
108 | with open(css_file_path, 'r', encoding='utf-8') as f:
109 | css_content = f.read()
110 | return f''
111 | except FileNotFoundError:
112 | print(f"Warning: CSS file not found: {css_file_path}")
113 | return match.group(0) # Keep original link tag if file not found
114 | except Exception as e:
115 | print(f"Error reading CSS file: {e}")
116 | return match.group(0)
117 |
118 | # Replace all CSS link tags with style tags
119 | return re.sub(css_link_pattern, replace_css_link, html_content)
120 |
121 |
122 | def inline_svg_images(html_content: str, svg_path: Optional[str] = None) -> str:
123 | """Replace SVG image tags with the actual SVG content in the HTML string."""
124 | img_pattern = r'
]+src="([^"]+\.svg)"[^>]*>'
125 |
126 | def replace_img_tag(match):
127 | # Get the full img tag and the src value
128 | img_tag = match.group(0)
129 | svg_file = match.group(1)
130 |
131 | # Extract the class and alt attributes if they exist
132 | class_match = re.search(r'class="([^"]+)"', img_tag)
133 | alt_match = re.search(r'alt="([^"]+)"', img_tag)
134 |
135 | class_attr = f' class="{class_match.group(1)}"' if class_match else ''
136 | alt_attr = f' aria-label="{alt_match.group(1)}"' if alt_match else ''
137 |
138 | # If svg_path is provided, use it, otherwise look in current directory
139 | if svg_path:
140 | svg_file_path = Path(svg_path) / Path(svg_file).name
141 | else:
142 | svg_file_path = Path(svg_file)
143 |
144 | try:
145 | with open(svg_file_path, 'r', encoding='utf-8') as f:
146 | svg_content = f.read()
147 | svg_content = svg_content.replace('