├── .circleci └── config.yml ├── .gitignore ├── .gitmodules ├── .mypy.ini ├── COPYING ├── Dockerfiles ├── circleci-env.yml ├── dev-env.yml ├── front ├── prod-example.yml ├── pythonbase ├── server └── worker ├── INSTALL.md ├── Makefile ├── Pipfile ├── Pipfile.lock ├── README.md ├── SECURITY.md ├── ep_celery.py ├── example_conf ├── gunicorn.py ├── jarr.circleci.json ├── jarr.json ├── jarr.test.json └── logging.ini ├── jarr ├── __init__.py ├── api │ ├── __init__.py │ ├── auth.py │ ├── category.py │ ├── cluster.py │ ├── common.py │ ├── feed.py │ ├── metrics.py │ ├── oauth.py │ ├── one_page_app.py │ ├── opml.py │ └── user.py ├── bootstrap.py ├── controllers │ ├── __init__.py │ ├── abstract.py │ ├── article.py │ ├── article_clusterizer.py │ ├── category.py │ ├── cluster.py │ ├── feed.py │ ├── feed_builder.py │ ├── icon.py │ └── user.py ├── crawler │ ├── article_builders │ │ ├── __init__.py │ │ ├── abstract.py │ │ ├── classic.py │ │ ├── json.py │ │ ├── koreus.py │ │ ├── reddit.py │ │ └── rss_bridge.py │ ├── crawlers │ │ ├── __init__.py │ │ ├── abstract.py │ │ ├── classic.py │ │ ├── json.py │ │ ├── koreus.py │ │ ├── reddit.py │ │ ├── rss_bridge.py │ │ └── tumblr.py │ ├── lib │ │ ├── __init__.py │ │ ├── feedparser_utils.py │ │ └── headers_handling.py │ ├── main.py │ ├── requests_utils.py │ └── utils.py ├── lib │ ├── __init__.py │ ├── clustering_af │ │ ├── __init__.py │ │ ├── extra_stopwords.py │ │ ├── grouper.py │ │ ├── postgres_casting.py │ │ └── vector.py │ ├── const.py │ ├── content_generator.py │ ├── emails.py │ ├── enums.py │ ├── filter.py │ ├── html_parsing.py │ ├── url_cleaners.py │ └── utils.py ├── metaconf.yml ├── metrics.py ├── models │ ├── __init__.py │ ├── article.py │ ├── category.py │ ├── cluster.py │ ├── feed.py │ ├── icon.py │ ├── user.py │ └── utc_datetime_type.py ├── signals.py ├── static │ └── img │ │ ├── favicon.ico │ │ ├── pinboard.png │ │ ├── readability.png │ │ ├── reddit.png │ │ └── twitter.png └── templates │ ├── mail_password_recovery.txt │ └── opml.xml ├── jsclient ├── .gitignore ├── package.json ├── public │ ├── favicon.ico │ ├── index.html │ ├── manifest.json │ └── robots.txt ├── src │ ├── Jarr.js │ ├── Jarr.styles.js │ ├── Jarr.theme.js │ ├── app │ │ └── store.js │ ├── authSlice.js │ ├── components │ │ ├── ClusterIcon.js │ │ ├── FeedIcon.js │ │ └── JarrIcon.gif │ ├── const.js │ ├── features │ │ ├── clusterlist │ │ │ ├── ClusterList.js │ │ │ ├── components │ │ │ │ ├── Article.js │ │ │ │ ├── Articles.js │ │ │ │ ├── Cluster.js │ │ │ │ ├── ProcessedContent.js │ │ │ │ ├── SelectedObjCard.js │ │ │ │ ├── TypedContents.js │ │ │ │ └── style.js │ │ │ └── slice.js │ │ ├── editpanel │ │ │ ├── Category │ │ │ │ └── index.js │ │ │ ├── EditPanel.js │ │ │ ├── Feed │ │ │ │ ├── Build.js │ │ │ │ ├── FilterSettings │ │ │ │ │ ├── Line.js │ │ │ │ │ ├── index.js │ │ │ │ │ └── style.js │ │ │ │ ├── ProposedLinks.js │ │ │ │ └── index.js │ │ │ ├── SettingsPanel.js │ │ │ ├── common │ │ │ │ ├── ClusterSettings.js │ │ │ │ ├── DeleteButton.js │ │ │ │ └── StateTextInput │ │ │ │ │ ├── index.js │ │ │ │ │ └── style.js │ │ │ ├── editPanelStyle.js │ │ │ └── slice.js │ │ ├── feedlist │ │ │ ├── FeedList.js │ │ │ ├── FeedRow.js │ │ │ ├── feedListStyle.js │ │ │ └── slice.js │ │ ├── noauth │ │ │ ├── NoAuth.js │ │ │ ├── components │ │ │ │ ├── InitPasswordRecovery.js │ │ │ │ ├── Login.js │ │ │ │ ├── OAuthLogin.js │ │ │ │ ├── PasswordRecovery.js │ │ │ │ ├── SignUp.js │ │ │ │ └── style.js │ │ │ └── noAuthSlice.js │ │ └── topmenu │ │ │ ├── TopMenu.js │ │ │ └── topMenuStyle.js │ ├── hooks │ │ ├── doBuildFeed.js │ │ ├── doCreateObj.js │ │ ├── doDeleteObj.js │ │ ├── doEditCluster.js │ │ ├── doEditObj.js │ │ ├── doFetchCluster.js │ │ ├── doFetchFeeds.js │ │ ├── doFetchObjForEdit.js │ │ ├── doFetchUnreadCount.js │ │ ├── doListClusters.js │ │ ├── doLoadMoreClusters.js │ │ └── doMarkAllAsRead.js │ ├── index.js │ ├── setupTests.js │ └── storageUtils.js └── yarn.lock ├── migrations ├── README ├── alembic.ini ├── env.py ├── script.py.mako └── versions │ ├── 20150219_add_column_entry_id.py │ ├── 20150225_remove_email_notification_column.py │ ├── 20150304_adding_feed_and_user_attributes_for_.py │ ├── 20150310_changed_the_type_of_the_column_last_.py │ ├── 20150518_adding_filters_field.py │ ├── 20150703_add_icon_column.py │ ├── 20150803_mv_icons_from_feed_tbl_to_icon_tbl.py │ ├── 20150911_adding_category.py │ ├── 20180809_add_article_valuable_tokens.py │ ├── 20180809_add_lang_column.py │ ├── 20180809_add_more_reasons.py │ ├── 20180809_add_user_timezone.py │ ├── 20180809_adding_comment_support.py │ ├── 20180809_adding_reasons_feed.py │ ├── 20180809_clustering.py │ ├── 20180809_drop_tag_table.py │ ├── 20180809_enhancing_comparison_style.py │ ├── 20180809_expires.py │ ├── 20180809_index_handling.py │ ├── 20180809_new_users.py │ ├── 20180809_oauth_linuxfr.py │ ├── 20180809_readability_integration.py │ ├── 20180809_reddit_inte_bool.py │ ├── 20180809_tag_handling.py │ ├── 20180830_cluster_control.py │ ├── 20180830_drop_old_indexes.py │ ├── 20180830_new_article_content_management.py │ ├── 20200503_typing_feed_and_articles.py │ ├── 20200505_add_cluster_read_reason_and_remove_feed_cache_col.py │ ├── 20200518_truncated_content.py │ ├── 20200603_improving_main_listing.py │ ├── 20201108_drop_readability_leftovers.py │ ├── 20210403_add_more_article_type.py │ └── 20220524_adding_feed_unread_count.py ├── tests ├── __init__.py ├── api │ ├── __init__.py │ ├── auth_test.py │ ├── category_test.py │ ├── cluster_test.py │ ├── feed_test.py │ ├── oauth_test.py │ ├── one_page_app_test.py │ ├── opml_test.py │ └── user_test.py ├── base.py ├── controllers │ ├── __init__.py │ ├── article_test.py │ ├── category_test.py │ ├── cluster_control_test.py │ ├── cluster_test.py │ ├── feed_test.py │ └── user_test.py ├── crawler │ ├── __init__.py │ ├── article_builder_test.py │ └── main_test.py ├── crawler_test.py ├── fixtures │ ├── __init__.py │ ├── article-2.html │ ├── article-2.json │ ├── article.html │ ├── article.json │ ├── entry-with-enclosure.json │ ├── example.feed.atom │ ├── feed.json │ ├── filler.py │ ├── img_tweet.json │ └── link_tweet.json ├── libs │ ├── __init__.py │ ├── article_cleaner_test.py │ ├── article_utils_test.py │ ├── content_generator.py │ ├── feed_utils_test.py │ ├── headers_handling_test.py │ ├── html_parsing_test.py │ ├── koreus_inte_test.py │ ├── reddit_inte_test.py │ └── rss_bridg_inte_test.py ├── model_test.py └── utils.py └── wsgi.py /.gitignore: -------------------------------------------------------------------------------- 1 | # use glob syntax 2 | syntax: glob 3 | 4 | *.elc 5 | *.pyc 6 | *~ 7 | *.db 8 | 9 | # Emacs 10 | eproject.cfg 11 | 12 | # Temporary files (vim backups) 13 | *.swp 14 | 15 | # Log files: 16 | *.log 17 | 18 | # Vagrant: 19 | .vagrant/ 20 | 21 | # Virtualenv 22 | venv 23 | build 24 | 25 | .coverage 26 | 27 | # node files 28 | node_modules 29 | 30 | # docker compose 31 | .jarr-data/ 32 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jaesivsm/JARR/b7cafda5f813868ba8e2fb41136241407cd83dfa/.gitmodules -------------------------------------------------------------------------------- /.mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | plugins = sqlalchemy.ext.mypy.plugin 3 | -------------------------------------------------------------------------------- /Dockerfiles/circleci-env.yml: -------------------------------------------------------------------------------- 1 | services: 2 | postgres: 3 | hostname: postgres 4 | image: postgres:16 5 | ports: 6 | - 0.0.0.0:5432:5432 7 | environment: 8 | - POSTGRES_PASSWORD=password 9 | redis: 10 | hostname: redis 11 | image: redis 12 | ports: 13 | - 0.0.0.0:6379:6379 14 | server: 15 | hostname: server 16 | container_name: server 17 | image: jarr-server:latest 18 | volumes: 19 | - "/home/circleci/project/Makefile:/jarr/Makefile" 20 | - "/home/circleci/project/tests:/jarr/tests" 21 | - "/home/circleci/project/example_conf:/etc/jarr" 22 | - "/home/circleci/coverage_results:/jarr/coverage_results:rw" 23 | -------------------------------------------------------------------------------- /Dockerfiles/dev-env.yml: -------------------------------------------------------------------------------- 1 | services: 2 | redis: 3 | image: redis 4 | ports: 5 | - 0.0.0.0:6379:6379 6 | rabbitmq: 7 | image: rabbitmq 8 | ports: 9 | - 0.0.0.0:25672:25672 10 | - 0.0.0.0:4369:4369 11 | - 0.0.0.0:5671:5671 12 | - 0.0.0.0:5672:5672 13 | postgres: 14 | image: postgres:16 15 | environment: 16 | - POSTGRES_PASSWORD=password 17 | - POSTGRES_HOST_AUTH_METHOD=trust 18 | ports: 19 | - 0.0.0.0:5432:5432 20 | volumes: 21 | - ../.jarr-data-16:/var/lib/postgresql/data 22 | command: 23 | - "postgres" 24 | - "-c" 25 | - "log_min_duration_statement=100" 26 | 27 | volumes: 28 | db-data: 29 | -------------------------------------------------------------------------------- /Dockerfiles/front: -------------------------------------------------------------------------------- 1 | FROM node:14.20.1 as build-deps 2 | ARG PUBLIC_URL= 3 | ARG REACT_APP_API_URL=0.0.0.0:8000 4 | WORKDIR /jarr 5 | RUN yarn global add serve 6 | COPY jsclient/package.json jsclient/yarn.lock ./ 7 | COPY jsclient/src/ /jarr/src 8 | COPY jsclient/public/ /jarr/public 9 | RUN yarn \ 10 | && yarn build \ 11 | && rm -rf src public node_modules package.json yarn.lock 12 | EXPOSE 80 13 | CMD ["serve", "-p", "80", "-s", "/jarr/build"] 14 | -------------------------------------------------------------------------------- /Dockerfiles/prod-example.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | jarr-server: 4 | build: 5 | context: ../. 6 | dockerfile: ./Dockerfiles/server 7 | ports: 8 | - "127.0.0.1:8500:8000" 9 | # You'll a folder with at least jarr.json, logging.ini and gunicorn.py 10 | volumes: 11 | - ../example_conf/:/etc/jarr:ro 12 | depends_on: 13 | - postgres 14 | - redis 15 | - rabbitmq 16 | networks: 17 | jarr: 18 | jarr-front: 19 | build: 20 | context: ../. 21 | dockerfile: ./Dockerfiles/front 22 | args: 23 | PUBLIC_URL: http://localhost:8501/ 24 | REACT_APP_API_URL: http://localhost:8500/ 25 | ports: 26 | - "127.0.0.1:8501:80" 27 | depends_on: 28 | - jarr-server 29 | networks: 30 | - jarr 31 | jarr-worker: 32 | build: 33 | context: ../. 34 | dockerfile: ./Dockerfiles/server 35 | # You'll a folder with at least jarr.json, logging.ini and gunicorn.py 36 | volumes: 37 | - ../example_conf/:/etc/jarr:ro 38 | depends_on: 39 | - postgres 40 | - redis 41 | - rabbitmq 42 | networks: 43 | - jarr 44 | rabbitmq: 45 | container_name: rabbitmq 46 | hostname: rabbitmq 47 | image: rabbitmq 48 | networks: 49 | - jarr 50 | redis: 51 | container_name: redis 52 | hostname: redis 53 | image: redis 54 | networks: 55 | - jarr 56 | postgres: 57 | container_name: postgres 58 | hostname: postgres 59 | image: postgres:15 60 | ports: 61 | - 127.0.0.1:5432:5432 62 | networks: 63 | - jarr 64 | # remove that on real prod env and set proper db password 65 | environment: 66 | - POSTGRES_PASSWORD=password 67 | - POSTGRES_HOST_AUTH_METHOD=trust 68 | # Directory in which your db will save its data, change it to some absolute path 69 | # volumes: 70 | # - ../.jarr-data:/var/lib/postgresql/data 71 | 72 | networks: 73 | jarr: 74 | -------------------------------------------------------------------------------- /Dockerfiles/pythonbase: -------------------------------------------------------------------------------- 1 | FROM python:3.12-slim 2 | 3 | RUN useradd jarr --create-home --shell /bin/bash --home-dir /jarr --user-group 4 | COPY example_conf/jarr.json /etc/jarr/jarr.json 5 | WORKDIR /jarr 6 | RUN apt-get update \ 7 | && apt-get install --yes --no-install-recommends \ 8 | gcc \ 9 | python3-dev \ 10 | libxml2-dev \ 11 | postgresql-server-dev-all \ 12 | && pip install --no-cache-dir --upgrade pip pipenv \ 13 | && apt clean --yes \ 14 | && apt autoremove --yes \ 15 | && rm -rf /var/log/apt /var/cache/apt /var/log/apt 16 | USER jarr 17 | COPY ./Pipfil* /jarr/ 18 | RUN mkdir /tmp/pipenv \ 19 | && PIPENV_NOSPIN=true \ 20 | PIPENV_COLORBLIND=true \ 21 | PIPENV_HIDE_EMOJIS=true \ 22 | PIPENV_CACHE_DIR=/tmp/pipenv pipenv sync \ 23 | && rm -rf /tmp/pipenv 24 | COPY ./migrations/ /jarr/migrations 25 | COPY ./*.py /jarr/ 26 | COPY ./jarr/ /jarr/jarr 27 | 28 | VOLUME /etc/jarr/ 29 | -------------------------------------------------------------------------------- /Dockerfiles/server: -------------------------------------------------------------------------------- 1 | FROM jarr-base:latest 2 | 3 | COPY example_conf/logging.ini /etc/jarr/logging.ini 4 | COPY example_conf/gunicorn.py /etc/jarr/gunicorn.py 5 | EXPOSE 8000 6 | CMD ["pipenv", "run", "gunicorn", "-c", "/etc/jarr/gunicorn.py", "--log-config", "/etc/jarr/logging.ini", "-b", "0.0.0.0:8000", "wsgi:application"] 7 | -------------------------------------------------------------------------------- /Dockerfiles/worker: -------------------------------------------------------------------------------- 1 | FROM jarr-base:latest 2 | EXPOSE 8001 3 | CMD ["pipenv", "run", "celery", "--app", "ep_celery.celery_app", "worker"] 4 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation procedure 2 | 3 | It is advised to used the Docker images `jaesivsm/jarr-server` and `jaesivsm/jarr-worker`. However, the `jaesivsm/jarr-front` is build with the address of the production server as API URL. You'll then to rebuild it. 4 | 5 | You can take inspiration in [`Dockerfiles/prod-example.yml`](https://github.com/jaesivsm/JARR/blob/master/Dockerfiles/dev-env.yml) which will provide you a working instance of JARR. 6 | 7 | ### Prerquisite 8 | 9 | To run JARR you'll need at least [Docker](https://docs.docker.com/get-docker/) installed. 10 | 11 | You'll also need [pipenv](https://github.com/pypa/pipenv#installation) even if there are undocumented ways around that. 12 | 13 | ## Running the code 14 | 15 | ```shell 16 | # first clone the repository 17 | git clone https://github.com/jaesivsm/JARR.git 18 | 19 | # then copy Dockerfiles/prod-example.yml and edit it to your liking 20 | # for the documentation here, we'll say that you use the example one 21 | 22 | # first get inside the cloned repo 23 | cd JARR 24 | 25 | # ensure you have the proper packages installed 26 | pipenv sync --dev 27 | 28 | # Build base image for server and workers 29 | make build-base 30 | 31 | # bring up env 32 | # /!\ You'll want to edit the example proposed below : 33 | # * have your own configurations file in a directory of you're own 34 | # * specify a proper directory for postgres to store your data in 35 | make start-env COMPOSE_FILE=Dockerfiles/prod-example.yml 36 | 37 | # create database inside the running postgres 38 | # /!\ this command is for test purpose and create db and user without password 39 | # you'll want to change that in production 40 | make db-bootstrap-user COMPOSE_FILE=Dockerfiles/prod-example.yml 41 | make db-bootstrap-tables COMPOSE_FILE=Dockerfiles/prod-example.yml 42 | 43 | # create JARR tables 44 | # /!\ this step will fail if you leave the default value for Database address, 45 | # you'll want to replace it with "postgresql" in the jarr.json configuration file 46 | make init-env-docker COMPOSE_FILE=Dockerfiles/prod-example.yml 47 | ``` 48 | 49 | Once finished, you may access jarr on `http://localhost:8501/` and login with `admin:admin`. 50 | 51 | ## Maintenance 52 | 53 | Some tasks will require you to run commands on either the server or the worker : 54 | 55 | #### Starting the scheduler 56 | 57 | To start the scheduler that'll run all background processing, run : 58 | 59 | ``` 60 | docker-compose --file Dockerfiles/prod-example.yml exec jarr-worker pipenv run python3 61 | ``` 62 | 63 | Then in the python terminal 64 | 65 | ```python 66 | from jarr.crawler.main import scheduler 67 | scheduler.apply_async() 68 | ``` 69 | 70 | #### Executing Schema and Data migration 71 | 72 | ``` 73 | docker-compose --file Dockerfiles/prod-example.yml exec jarr-worker flask db upgrade 74 | ``` 75 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [dev-packages] 7 | pycodestyle = "*" 8 | pylint = "*" 9 | flake8 = "*" 10 | coverage = "*" 11 | Flask-Testing = "*" 12 | isort = "*" 13 | ipdb = "*" 14 | ipython = "*" 15 | mypy = "*" 16 | types-redis = "*" 17 | types-requests = "*" 18 | types-python-dateutil = "*" 19 | types-mock = "*" 20 | SQLAlchemy = {extras = ["mypy"]} 21 | black = "*" 22 | pytest = "*" 23 | pytest-cov = "*" 24 | 25 | [packages] 26 | advocate = "==1.*" 27 | alembic = "==1.*" 28 | "beautifulsoup4" = "==4.*" 29 | blinker = "==1.*" 30 | celery = "==5.*" 31 | feedparser = "==6.*" 32 | flask = "==3.*" 33 | flask-cors = "*" 34 | flask-jwt-extended = "==4.*" 35 | flask-migrate = "==4.*" 36 | flask-restx = "==1.*" 37 | goose3 = "==3.*" 38 | gunicorn = "==22.*" 39 | json-logging-py = "*" # needed by gunicorn config 40 | opml = "==0.*" 41 | prometheus-distributed-client = "==1.*" 42 | psycopg2-binary = "==2.*" 43 | python-dateutil = "*" 44 | rauth = "==0.7.3" 45 | redis = "==5.*" 46 | requests = "==2.*" 47 | SQLAlchemy = "==2.*" 48 | the-conf = "==0.*" 49 | 50 | [requires] 51 | python_version = "3.12" 52 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JARR 2 | 3 | [![CircleCI](https://circleci.com/gh/jaesivsm/JARR.svg?style=shield)](https://circleci.com/gh/jaesivsm/JARR) 4 | [![Codacy Badge](https://api.codacy.com/project/badge/Grade/8b81ef446c4849939796c4965f121ffe)](https://www.codacy.com/manual/jaesivsm/JARR?utm_source=github.com&utm_medium=referral&utm_content=jaesivsm/JARR&utm_campaign=Badge_Grade) 5 | [![Coverage](https://api.codacy.com/project/badge/Coverage/8b81ef446c4849939796c4965f121ffe)](https://www.codacy.com/manual/jaesivsm/JARR?utm_source=github.com&utm_medium=referral&utm_content=jaesivsm/JARR&utm_campaign=Badge_Coverage) 6 | 7 | ## Presentation 8 | 9 | JARR (which stands for Just Another RSS Reader) is a web-based news aggregator and reader. 10 | 11 | JARR is under ongoing developments and functionnalities are regularly added. 12 | For past and futur updates see the [milestones](https://github.com/jaesivsm/JARR/milestones). 13 | 14 | The particularity of this project is to allow for article Clustering either on links or on content through TF-IDF. 15 | 16 | ## Official instance 17 | 18 | You can use the instance of JARR run by the maintainer on [app.jarr.info](https://app.jarr.info) or try out the api at [api.jarr.info](https://api.jarr.info). 19 | 20 | ## Installation 21 | 22 | To use and host your own instance of JARR please see [installation instruction](https://github.com/jaesivsm/JARR/blob/master/INSTALL.md) 23 | 24 | ## Security 25 | 26 | Please refer to the [security instruction](https://github.com/jaesivsm/JARR/blob/master/SECURITY.md). 27 | 28 | ## License 29 | 30 | JARR is under the [GNU Affero General Public License version 3](https://www.gnu.org/licenses/agpl-3.0.html). 31 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Reporting security issues 2 | 3 | For common issues the recommended way is to open a [Pull Request](https://github.com/jaesivsm/JARR/issues/new). 4 | 5 | For security issues related to either JARR itself or its [main instance](https://app.jarr.info), the best way is to contact [the main maintenaner by mail](mailto:contact@jarr.info). 6 | -------------------------------------------------------------------------------- /ep_celery.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from celery import Celery, signals 4 | 5 | from jarr.bootstrap import commit_pending_sql, conf, rollback_pending_sql 6 | 7 | celery_app = Celery(broker=conf.celery.broker_url, 8 | config_source=conf.celery) 9 | signals.task_success.connect(commit_pending_sql) 10 | signals.task_failure.connect(rollback_pending_sql) 11 | -------------------------------------------------------------------------------- /example_conf/gunicorn.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | 3 | workers = multiprocessing.cpu_count() + 1 4 | -------------------------------------------------------------------------------- /example_conf/jarr.circleci.json: -------------------------------------------------------------------------------- 1 | { 2 | "jarr_testing": true, 3 | "crawler": { 4 | "login": "admin", 5 | "passwd": "admin" 6 | }, 7 | "clustering": { 8 | "tfidf": { 9 | "min_vector_size": 2 10 | } 11 | }, 12 | "log": { 13 | "level": 0 14 | }, 15 | "db": { 16 | "pg_uri": "postgresql://postgres:password@postgres:5432/jarr", 17 | "redis": {"host": "redis"} 18 | }, 19 | "celery": {"broker": "amqp://0.0.0.0//", 20 | "backend": "redis://0.0.0.0:6379/0", 21 | "BROKER_URL": "amqp://0.0.0.0//"}, 22 | "auth": {"secret_key": "my not so secret key"} 23 | } 24 | -------------------------------------------------------------------------------- /example_conf/jarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "jarr_testing": false, 3 | "crawler": { 4 | "login": "admin", 5 | "passwd": "admin" 6 | }, 7 | "log": { 8 | "level": 10 9 | }, 10 | "db": { 11 | "pg_uri": "postgresql://jarr@postgres:5432/jarr", 12 | "redis": {"host": "redis"}, 13 | "metrics": {"host": "redis"} 14 | }, 15 | "celery": {"broker": "amqp://rabbitmq//", 16 | "backend": "redis://redis:6379/0", 17 | "broker_url": "amqp://rabbitmq//"}, 18 | "auth": {"secret_key": "my not so secret key"} 19 | } 20 | -------------------------------------------------------------------------------- /example_conf/jarr.test.json: -------------------------------------------------------------------------------- 1 | { 2 | "jarr_testing": true, 3 | "crawler": { 4 | "login": "admin", 5 | "passwd": "admin" 6 | }, 7 | "clustering": { 8 | "tfidf": { 9 | "min_vector_size": 2 10 | } 11 | }, 12 | "log": { 13 | "level": 0 14 | }, 15 | "db": { 16 | "pg_uri": "postgresql://jarr_test@0.0.0.0:5432/jarr_test", 17 | "redis": {"host": "0.0.0.0"}, 18 | "metrics": {"host": "0.0.0.0"} 19 | }, 20 | "celery": {"broker": "amqp://0.0.0.0//", 21 | "backend": "redis://0.0.0.0:6379/0", 22 | "BROKER_URL": "amqp://0.0.0.0//"}, 23 | "auth": {"secret_key": "my not so secret key"} 24 | } 25 | -------------------------------------------------------------------------------- /example_conf/logging.ini: -------------------------------------------------------------------------------- 1 | [loggers] 2 | keys=root, gunicorn.error 3 | 4 | [handlers] 5 | keys=console 6 | 7 | [formatters] 8 | keys=json 9 | 10 | [logger_root] 11 | level=INFO 12 | handlers=console 13 | 14 | [logger_gunicorn.error] 15 | level=ERROR 16 | handlers=console 17 | propagate=0 18 | qualname=gunicorn.error 19 | 20 | [handler_console] 21 | class=StreamHandler 22 | formatter=json 23 | args=(sys.stdout, ) 24 | 25 | [formatter_json] 26 | class=jsonlogging.JSONFormatter 27 | -------------------------------------------------------------------------------- /jarr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jaesivsm/JARR/b7cafda5f813868ba8e2fb41136241407cd83dfa/jarr/__init__.py -------------------------------------------------------------------------------- /jarr/api/common.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from enum import Enum 3 | from urllib.parse import SplitResult, urlsplit, urlunsplit 4 | 5 | from flask_restx import fields 6 | from jarr.bootstrap import conf 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | MODEL_PARSER_MAPPING = { 11 | bool: fields.Boolean, 12 | float: fields.Float, 13 | str: fields.String, 14 | int: fields.Integer, 15 | } 16 | 17 | clustering_options = { 18 | "cluster_enabled": "will allow article in your feeds and categories to" 19 | " to be clusterized", 20 | "cluster_tfidf_enabled": "will allow article in your feeds and categor" 21 | "ies to be clusterized through document comparison", 22 | "cluster_same_category": "will allow article in your feeds and categor" 23 | "ies to be clusterized while beloning to the same category", 24 | "cluster_same_feed": "will allow article in your feeds and categories " 25 | "to be clusterized while beloning to the same feed", 26 | "cluster_wake_up": "will unread cluster when article from that feed " 27 | "are added to it", 28 | } 29 | 30 | 31 | def set_clustering_options(level, model, parser, nullable=True): 32 | if level == "user": 33 | suffix = " (article's feed and category clustering settings allows it)" 34 | elif level == "category": 35 | suffix = " (article's feed and user clustering settings allows it)" 36 | elif level == "feed": 37 | suffix = " (article's category and user clustering settings allows it)" 38 | for option, description in clustering_options.items(): 39 | set_model_n_parser( 40 | model, 41 | parser, 42 | option, 43 | bool, 44 | nullable=nullable, 45 | description=description + suffix, 46 | ) 47 | 48 | 49 | class EnumField(fields.String): 50 | 51 | def __init__(self, enum, **kwargs): 52 | super().__init__(enum=[e.value for e in enum], **kwargs) 53 | 54 | def format(self, value): 55 | return super().format(getattr(value, "value", value)) 56 | 57 | 58 | def set_model_n_parser(model, parser, name, type_, **kwargs): 59 | if isinstance(type_, Enum.__class__): 60 | enum = type_ 61 | model[name] = EnumField(enum, **kwargs) 62 | kwargs["choices"] = list(enum) 63 | 64 | def caster(*args): 65 | return enum(args[0]) 66 | 67 | else: 68 | model[name] = MODEL_PARSER_MAPPING[type_](**kwargs) 69 | caster = type_ 70 | desc = kwargs.pop("description", None) 71 | parser.add_argument( 72 | name, type=caster, store_missing=False, help=desc, **kwargs 73 | ) 74 | 75 | 76 | def parse_meaningful_params(parser): 77 | nullable_keys = {arg.name for arg in parser.args if arg.nullable} 78 | return { 79 | key: value 80 | for key, value in parser.parse_args().items() 81 | if value is not None or key in nullable_keys 82 | } 83 | 84 | 85 | def get_ui_url(path_extention): 86 | split = urlsplit(conf.app.url) 87 | split = SplitResult( 88 | scheme=split.scheme, 89 | netloc=split.netloc, 90 | path=split.path + path_extention, 91 | query=split.query, 92 | fragment=split.fragment, 93 | ) 94 | return urlunsplit(split) 95 | -------------------------------------------------------------------------------- /jarr/api/metrics.py: -------------------------------------------------------------------------------- 1 | from flask import Response 2 | from flask_restx import Namespace, Resource 3 | from prometheus_client import CONTENT_TYPE_LATEST, generate_latest 4 | 5 | from jarr.metrics import REGISTRY 6 | 7 | metrics_ns = Namespace('metrics', description="Prometheus metrics") 8 | 9 | 10 | @metrics_ns.route('') 11 | class Metric(Resource): 12 | 13 | @staticmethod 14 | def get(): 15 | return Response(generate_latest(REGISTRY), 16 | mimetype=CONTENT_TYPE_LATEST) 17 | -------------------------------------------------------------------------------- /jarr/api/user.py: -------------------------------------------------------------------------------- 1 | from flask_jwt_extended import current_user, jwt_required 2 | from flask_restx import Namespace, Resource, fields 3 | from werkzeug.exceptions import BadRequest 4 | 5 | from jarr.api.common import (parse_meaningful_params, set_clustering_options, 6 | set_model_n_parser) 7 | from jarr.controllers import UserController 8 | 9 | user_ns = Namespace("user", description="User related operations (update, " 10 | "delete and password management)") 11 | model = user_ns.model("User", {'login': fields.String()}) 12 | parser = user_ns.parser() 13 | set_model_n_parser(model, parser, "email", str, nullable=False) 14 | set_model_n_parser(model, parser, "timezone", str, nullable=False) 15 | set_clustering_options("user", model, parser, nullable=False) 16 | parser_edit = parser.copy() 17 | parser_edit.add_argument("password", type=str, nullable=False, 18 | store_missing=False) 19 | parser.add_argument("password", type=str, nullable=False, required=True) 20 | parser.add_argument("login", type=str, nullable=False, required=True) 21 | 22 | 23 | @user_ns.route("") 24 | class UserResource(Resource): 25 | 26 | @staticmethod 27 | @user_ns.response(200, "OK", model=model) 28 | @user_ns.response(401, "Unauthorized") 29 | @user_ns.marshal_with(model) 30 | @jwt_required() 31 | def get(): 32 | user = UserController(current_user.id).get(id=current_user.id) 33 | return user, 200 34 | 35 | @staticmethod 36 | @user_ns.expect(parser, validate=True) 37 | @user_ns.response(201, "Created", model=model) 38 | @user_ns.marshal_with(model) 39 | def post(): 40 | attrs = parse_meaningful_params(parser) 41 | return UserController().create(**attrs), 201 42 | 43 | @staticmethod 44 | @user_ns.expect(parser_edit, validate=True) 45 | @user_ns.response(200, "Updated", model=model) 46 | @user_ns.response(401, "Unauthorized") 47 | @user_ns.marshal_with(model) 48 | @jwt_required() 49 | def put(): 50 | user_id = current_user.id 51 | attrs = parse_meaningful_params(parser_edit) 52 | if not attrs: 53 | raise BadRequest() 54 | query = UserController(user_id).update( 55 | {"id": user_id}, attrs, return_objs=True) 56 | return query.first(), 200 57 | 58 | @staticmethod 59 | @user_ns.response(204, "Deleted") 60 | @user_ns.response(401, "Unauthorized") 61 | @jwt_required() 62 | def delete(): 63 | UserController(current_user.id).delete(current_user.id) 64 | return None, 204 65 | -------------------------------------------------------------------------------- /jarr/bootstrap.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 - 3 | 4 | # required imports and code exection for basic functionning 5 | 6 | import logging 7 | 8 | from prometheus_distributed_client import set_redis_conn 9 | from redis import Redis 10 | from sqlalchemy import create_engine 11 | from sqlalchemy.orm import registry, scoped_session, sessionmaker 12 | from the_conf import TheConf 13 | 14 | conf = TheConf('jarr/metaconf.yml') 15 | 16 | 17 | def is_secure_served(): 18 | return conf.api.scheme == 'https' 19 | 20 | 21 | def init_logging(log_path=None, log_level=logging.INFO, modules=(), 22 | log_format='%(asctime)s %(levelname)s %(message)s'): 23 | 24 | if not modules: 25 | modules = 'root', 'wsgi', 'manager', 'jarr' 26 | if log_path: 27 | handler = logging.FileHandler(log_path) 28 | else: 29 | handler = logging.StreamHandler() 30 | formater = logging.Formatter(log_format) 31 | handler.setFormatter(formater) 32 | for logger_name in modules: 33 | logger = logging.getLogger(logger_name) 34 | logger.addHandler(handler) 35 | for handler in logger.handlers: 36 | handler.setLevel(log_level) 37 | logger.setLevel(log_level) 38 | 39 | 40 | def init_db(echo=False): 41 | mapper_registry = registry() 42 | new_engine = create_engine( 43 | conf.db.pg_uri, 44 | echo=echo, 45 | pool_size=conf.db.postgres.pool_size, 46 | max_overflow=conf.db.postgres.max_overflow, 47 | pool_recycle=conf.db.postgres.pool_recycle, 48 | pool_pre_ping=conf.db.postgres.pool_pre_ping, 49 | pool_use_lifo=conf.db.postgres.pool_use_lifo, 50 | ) 51 | NewBase = mapper_registry.generate_base() 52 | new_session = scoped_session(sessionmaker(bind=new_engine)) 53 | return mapper_registry, new_engine, new_session, NewBase 54 | 55 | 56 | def init_models(): 57 | from jarr import models 58 | return models 59 | 60 | 61 | def commit_pending_sql(*args, **kwargs): 62 | session.commit() 63 | 64 | 65 | def rollback_pending_sql(*args, **kwargs): 66 | session.rollback() 67 | 68 | 69 | sqlalchemy_registry, engine, session, Base = init_db() 70 | init_models() 71 | set_redis_conn(host=conf.db.metrics.host, 72 | db=conf.db.metrics.db, 73 | port=conf.db.metrics.port) 74 | init_logging(conf.log.path, log_level=logging.WARNING, 75 | modules=('the_conf',)) 76 | init_logging(conf.log.path, log_level=conf.log.level) 77 | REDIS_CONN = Redis(host=conf.db.redis.host, 78 | db=conf.db.redis.db, 79 | port=conf.db.redis.port) 80 | -------------------------------------------------------------------------------- /jarr/controllers/__init__.py: -------------------------------------------------------------------------------- 1 | """Root package for all controllers.""" 2 | 3 | from .feed import FeedController 4 | from .category import CategoryController 5 | from .article import ArticleController 6 | from .user import UserController 7 | from .icon import IconController 8 | from .cluster import ClusterController 9 | from .feed_builder import FeedBuilderController 10 | 11 | 12 | __all__ = ['FeedController', 'CategoryController', 'ArticleController', 13 | 'UserController', 'IconController', 'ClusterController', 14 | 'FeedBuilderController'] 15 | -------------------------------------------------------------------------------- /jarr/controllers/category.py: -------------------------------------------------------------------------------- 1 | from jarr.models import Category 2 | 3 | from .abstract import AbstractController 4 | 5 | 6 | class CategoryController(AbstractController): 7 | _db_cls = Category 8 | 9 | def delete(self, obj_id, commit=True): 10 | from jarr.controllers import FeedController 11 | FeedController(self.user_id).update({'category_id': obj_id}, 12 | {'category_id': None}, 13 | commit=None) 14 | return super().delete(obj_id) 15 | -------------------------------------------------------------------------------- /jarr/controllers/icon.py: -------------------------------------------------------------------------------- 1 | import base64 2 | 3 | from jarr.bootstrap import session 4 | from jarr.models import Icon 5 | from jarr.lib.utils import jarr_get 6 | 7 | from .abstract import AbstractController 8 | 9 | 10 | class IconController(AbstractController): 11 | _db_cls = Icon 12 | _user_id_key = None # type: str 13 | 14 | @staticmethod 15 | def _build_from_url(attrs): 16 | if "url" in attrs and "content" not in attrs: 17 | try: 18 | resp = jarr_get(attrs["url"]) 19 | except Exception: 20 | return attrs 21 | attrs["url"] = resp.url 22 | attrs["mimetype"] = resp.headers.get("content-type", None) 23 | attrs["content"] = base64.b64encode(resp.content).decode("utf8") 24 | return attrs 25 | 26 | def create(self, **attrs): 27 | return super().create(**self._build_from_url(attrs)) 28 | 29 | def update(self, filters, attrs, return_objs=False, commit=True): 30 | attrs = self._build_from_url(attrs) 31 | return super().update(filters, attrs, return_objs, commit) 32 | 33 | def delete(self, obj_id, commit=True): 34 | obj = self.get(url=obj_id) 35 | session.delete(obj) 36 | if commit: 37 | session.flush() 38 | session.commit() 39 | return obj 40 | -------------------------------------------------------------------------------- /jarr/controllers/user.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from jarr.controllers.abstract import AbstractController 4 | from jarr.models import User 5 | from werkzeug.security import check_password_hash, generate_password_hash 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class UserController(AbstractController): 11 | _db_cls = User 12 | _user_id_key = "id" 13 | 14 | @staticmethod 15 | def _handle_password(attrs): 16 | if attrs.get("password"): 17 | attrs["password"] = generate_password_hash(attrs["password"]) 18 | elif "password" in attrs: 19 | del attrs["password"] 20 | 21 | def check_password(self, username, password): 22 | user = self.get(login=username) 23 | if check_password_hash(user.password, password): 24 | return user 25 | 26 | def create(self, **attrs): 27 | self._handle_password(attrs) 28 | return super().create(**attrs) 29 | 30 | def update(self, filters, attrs, return_objs=False, commit=True): 31 | self._handle_password(attrs) 32 | return super().update(filters, attrs, return_objs, commit) 33 | 34 | def delete(self, obj_id, commit=True): 35 | from jarr.controllers import ArticleController, ClusterController 36 | fltr = {"user_id": obj_id} 37 | ClusterController(self.user_id).update(fltr, {"main_article_id": None}) 38 | ArticleController(self.user_id).update(fltr, {"cluster_id": None}) 39 | return super().delete(obj_id) 40 | -------------------------------------------------------------------------------- /jarr/crawler/article_builders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jaesivsm/JARR/b7cafda5f813868ba8e2fb41136241407cd83dfa/jarr/crawler/article_builders/__init__.py -------------------------------------------------------------------------------- /jarr/crawler/article_builders/json.py: -------------------------------------------------------------------------------- 1 | import html 2 | import logging 3 | from datetime import timezone 4 | 5 | import dateutil.parser 6 | 7 | from jarr.crawler.article_builders.abstract import AbstractArticleBuilder 8 | from jarr.lib.utils import utc_now 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class JsonArticleBuilder(AbstractArticleBuilder): 14 | 15 | @staticmethod 16 | def extract_id(entry): 17 | return entry['id'] 18 | 19 | @staticmethod 20 | def extract_date(entry): 21 | published = entry.get('date_published') 22 | if published: 23 | return dateutil.parser.parse(published).astimezone(timezone.utc) 24 | return utc_now() 25 | 26 | @staticmethod 27 | def extract_title(entry): 28 | if entry.get('title'): 29 | return html.unescape(entry['title']) 30 | 31 | @staticmethod 32 | def extract_tags(entry): 33 | return set(entry.get('tags') or []) 34 | 35 | @staticmethod 36 | def extract_link(entry): 37 | return entry.get('external_url') or entry.get('url') 38 | 39 | @staticmethod 40 | def extract_content(entry): 41 | for content_key in 'content_html', 'content_text': 42 | if entry.get(content_key): 43 | return entry[content_key] 44 | return '' 45 | 46 | def extract_lang(self, entry): 47 | return entry.get('language') or self._top_level.get('language') 48 | 49 | @staticmethod 50 | def extract_comments(entry): 51 | return entry.get('url') or entry.get('external_url') 52 | 53 | def _all_articles(self): 54 | known_links = {self.article['link'], 55 | self.extract_link(self.entry), 56 | self.article['comments']} 57 | yield self.article 58 | for i, link in enumerate(self.entry.get('attachments') or []): 59 | try: 60 | content_type = link['mime_type'] 61 | title = link.get('title') 62 | link = link['url'] 63 | except (KeyError, TypeError): 64 | continue 65 | if link in known_links: 66 | continue 67 | known_links.add(link) 68 | enclosure = self.template_article() 69 | enclosure['order_in_cluster'] = i 70 | for key, value in self.article.items(): 71 | if key in {'title', 'lang', 'link_hash', 'entry_id'}: 72 | enclosure[key] = value 73 | enclosure['link'] = link 74 | if title: 75 | enclosure['title'] = title 76 | self._feed_content_type(content_type, enclosure) 77 | yield enclosure 78 | -------------------------------------------------------------------------------- /jarr/crawler/article_builders/koreus.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from bs4 import BeautifulSoup 4 | 5 | from jarr.crawler.article_builders.classic import ClassicArticleBuilder 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class KoreusArticleBuilder(ClassicArticleBuilder): 11 | 12 | @staticmethod 13 | def extract_link(entry): 14 | text = None 15 | if entry.get('summary_detail') \ 16 | and entry['summary_detail'].get('value'): 17 | text = entry['summary_detail']['value'] 18 | elif entry.get('summary'): 19 | text = entry['summary'] 20 | else: 21 | for content in entry.get('content') or []: 22 | if content and content.get('value'): 23 | text = content['value'] 24 | if text is None: 25 | return super().extract_link(entry) 26 | soup = BeautifulSoup(text, 'html.parser') 27 | return soup.find_all('a')[0].attrs['href'] 28 | 29 | @staticmethod 30 | def extract_comments(entry): 31 | return entry.get('link') 32 | -------------------------------------------------------------------------------- /jarr/crawler/article_builders/reddit.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from bs4 import BeautifulSoup 4 | 5 | from jarr.crawler.article_builders.classic import ClassicArticleBuilder 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class RedditArticleBuilder(ClassicArticleBuilder): 11 | 12 | def __init__(self, *args, **kwargs): 13 | """Reddit article builder. Will swap link and comments.""" 14 | self._article_soup = None 15 | super().__init__(*args, **kwargs) 16 | 17 | def _parse_reddit_content(self, entry): 18 | if not self._article_soup: 19 | self._article_soup = BeautifulSoup(self.extract_content(entry), 20 | 'html.parser') 21 | link, comments = self._article_soup.find_all('a')[-2:] 22 | if link.text != '[link]' or comments.text != '[comments]': 23 | raise ValueError('wrong stuff') 24 | return link.attrs['href'], comments.attrs['href'] 25 | 26 | @staticmethod 27 | def extract_tags(entry): 28 | return set() # reddit tags are irrelevant, removing them 29 | 30 | def extract_link(self, entry): 31 | try: 32 | return self._parse_reddit_content(entry)[0] 33 | except Exception: 34 | return super().extract_link(entry) 35 | 36 | def extract_comments(self, entry): 37 | try: 38 | return self._parse_reddit_content(entry)[1] 39 | except Exception: 40 | return super().extract_comments(entry) 41 | -------------------------------------------------------------------------------- /jarr/crawler/article_builders/rss_bridge.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | 3 | from jarr.crawler.article_builders.classic import ClassicArticleBuilder 4 | 5 | 6 | class RSSBridgeArticleBuilder(ClassicArticleBuilder): 7 | 8 | @property 9 | def do_skip_creation(self): 10 | title = self.entry.get('title') or '' 11 | if title.startswith('Bridge returned error'): 12 | return True 13 | return super().do_skip_creation 14 | 15 | 16 | class RSSBridgeTwitterArticleBuilder(RSSBridgeArticleBuilder): 17 | 18 | def enhance(self): 19 | try: 20 | content = self.entry['content'][0]['value'] 21 | content_type = self.entry['content'][0].get('type', 'text/html') 22 | except (KeyError, IndexError): 23 | return 24 | if content_type != 'text/html': 25 | return 26 | soup = BeautifulSoup(content, 'html.parser') 27 | og_link = self.article['link'] 28 | og_comments = self.article.get('comments') 29 | try: # trying to find the last link in the tweet 30 | all_links = [link for link in soup.find_all('a') 31 | if not link.find_all('img') # no image 32 | # and no profil pic 33 | and og_link not in link.attrs['href']] 34 | if all_links: 35 | self.article['comments'] = self.article['link'] 36 | self.article['link'] = all_links[-1].attrs['href'] 37 | except (KeyError, AttributeError, TypeError, IndexError): 38 | self.article['link'] = og_link 39 | self.article['comments'] = og_comments 40 | yield from super().enhance() 41 | -------------------------------------------------------------------------------- /jarr/crawler/crawlers/__init__.py: -------------------------------------------------------------------------------- 1 | """Root package for all implemented Crawlers.""" 2 | from .abstract import AbstractCrawler 3 | from .classic import ClassicCrawler 4 | from .json import JSONCrawler 5 | from .koreus import KoreusCrawler 6 | from .reddit import RedditCrawler 7 | from .rss_bridge import InstagramCrawler, SoundcloudCrawler 8 | from .tumblr import TumblrCrawler 9 | 10 | __all__ = ['AbstractCrawler', 'ClassicCrawler', 'InstagramCrawler', 11 | 'SoundcloudCrawler', 'KoreusCrawler', 'RedditCrawler', 12 | 'JSONCrawler', 'TumblrCrawler'] 13 | -------------------------------------------------------------------------------- /jarr/crawler/crawlers/classic.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Optional 3 | 4 | import feedparser 5 | 6 | from jarr.controllers.feed_builder import FeedBuilderController 7 | from jarr.crawler.crawlers.abstract import AbstractCrawler 8 | from jarr.lib.enums import FeedType 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class ClassicCrawler(AbstractCrawler): 14 | feed_type: Optional[FeedType] = FeedType.classic 15 | 16 | def parse_feed_response(self, response): 17 | parsed = feedparser.parse(response.content.strip()) 18 | if not FeedBuilderController(self.feed.link, parsed).is_parsed_feed(): 19 | self.set_feed_error(parsed_feed=parsed) 20 | return 21 | return parsed 22 | -------------------------------------------------------------------------------- /jarr/crawler/crawlers/json.py: -------------------------------------------------------------------------------- 1 | from jarr.crawler.crawlers.abstract import AbstractCrawler 2 | from jarr.lib.enums import FeedType 3 | from jarr.crawler.article_builders.json import JsonArticleBuilder 4 | 5 | 6 | class JSONCrawler(AbstractCrawler): 7 | feed_type = FeedType.json 8 | article_builder = JsonArticleBuilder 9 | 10 | def parse_feed_response(self, response): 11 | parsed = response.json() 12 | parsed['entries'] = parsed.pop('items') 13 | return parsed 14 | -------------------------------------------------------------------------------- /jarr/crawler/crawlers/koreus.py: -------------------------------------------------------------------------------- 1 | from jarr.crawler.crawlers.classic import ClassicCrawler 2 | from jarr.lib.enums import FeedType 3 | from jarr.crawler.article_builders.koreus import KoreusArticleBuilder 4 | 5 | 6 | class KoreusCrawler(ClassicCrawler): 7 | feed_type = FeedType.koreus 8 | article_builder = KoreusArticleBuilder 9 | -------------------------------------------------------------------------------- /jarr/crawler/crawlers/reddit.py: -------------------------------------------------------------------------------- 1 | from jarr.crawler.crawlers.classic import ClassicCrawler 2 | from jarr.crawler.article_builders.reddit import RedditArticleBuilder 3 | from jarr.lib.enums import FeedType 4 | 5 | 6 | class RedditCrawler(ClassicCrawler): 7 | feed_type = FeedType.reddit 8 | article_builder = RedditArticleBuilder 9 | -------------------------------------------------------------------------------- /jarr/crawler/crawlers/rss_bridge.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from urllib.parse import SplitResult, urlencode, urlsplit, urlunsplit 3 | 4 | from jarr.bootstrap import conf 5 | from jarr.crawler.article_builders.rss_bridge import ( 6 | RSSBridgeArticleBuilder, RSSBridgeTwitterArticleBuilder) 7 | from jarr.crawler.crawlers.classic import ClassicCrawler 8 | from jarr.crawler.lib.headers_handling import prepare_headers 9 | from jarr.lib.enums import FeedType 10 | from jarr.lib.utils import jarr_get 11 | 12 | 13 | class RssBridgeAbstractCrawler(ClassicCrawler): 14 | bridge: str 15 | bridge_format = "AtomFormat" 16 | article_builder = RSSBridgeArticleBuilder 17 | feed_type: Optional[FeedType] = None # forcing this crawler to be ignored 18 | 19 | def request(self): 20 | return jarr_get( 21 | self.get_url(), 22 | timeout=conf.crawler.timeout, 23 | user_agent=conf.crawler.user_agent, 24 | headers=prepare_headers(self.feed), 25 | ssrf_protect=False, 26 | ) 27 | 28 | def get_url(self): 29 | split = ( 30 | urlsplit(conf.plugins.rss_bridge) 31 | if conf.plugins.rss_bridge 32 | else None 33 | ) 34 | 35 | query = { 36 | "action": "display", 37 | "format": self.bridge_format, 38 | "bridge": self.bridge, 39 | "u": self.feed.link, 40 | } 41 | 42 | return urlunsplit( 43 | SplitResult( 44 | scheme=split.scheme, 45 | netloc=split.netloc, 46 | path=split.path or "/", 47 | query=urlencode(query), 48 | fragment="", 49 | ) 50 | ) 51 | 52 | 53 | class InstagramCrawler(RssBridgeAbstractCrawler): 54 | feed_type = FeedType.instagram 55 | bridge = "InstagramBridge" 56 | 57 | 58 | class SoundcloudCrawler(RssBridgeAbstractCrawler): 59 | feed_type = FeedType.soundcloud 60 | bridge = "SoundcloudBridge" 61 | 62 | 63 | class TwitterCrawler(RssBridgeAbstractCrawler): 64 | feed_type = FeedType.twitter 65 | bridge = "TwitterBridge" 66 | article_builder = RSSBridgeTwitterArticleBuilder 67 | -------------------------------------------------------------------------------- /jarr/crawler/crawlers/tumblr.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from jarr.bootstrap import conf 4 | from jarr.crawler.crawlers.classic import ClassicCrawler 5 | from jarr.crawler.lib.headers_handling import prepare_headers 6 | from jarr.lib.const import GOOGLE_BOT_UA 7 | from jarr.lib.enums import FeedType 8 | from jarr.lib.utils import jarr_get 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class TumblrCrawler(ClassicCrawler): 14 | feed_type = FeedType.tumblr 15 | 16 | def request(self): 17 | headers = prepare_headers(self.feed) 18 | # using google bot header to trick tumblr rss... 19 | headers['User-Agent'] = GOOGLE_BOT_UA 20 | return jarr_get(self.get_url(), 21 | timeout=conf.crawler.timeout, 22 | user_agent=conf.crawler.user_agent, 23 | headers=headers) 24 | -------------------------------------------------------------------------------- /jarr/crawler/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jaesivsm/JARR/b7cafda5f813868ba8e2fb41136241407cd83dfa/jarr/crawler/lib/__init__.py -------------------------------------------------------------------------------- /jarr/crawler/lib/feedparser_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Generator, List, Optional 2 | 3 | 4 | def reach_in( 5 | entry, key: str, sub_key: Optional[str] = None 6 | ) -> Generator[str, None, None]: 7 | value = entry.get(key) 8 | if isinstance(value, str): 9 | assert sub_key in {None, "value", "href"}, ( 10 | "shouldn't reach for anything else but " 11 | "'value' if landing on a string value" 12 | ) 13 | yield value 14 | elif isinstance(value, list): 15 | for sub_value in value: 16 | if isinstance(sub_value, dict): 17 | if sub_value.get(sub_key): 18 | yield sub_value[sub_key] 19 | elif isinstance(value, dict): 20 | if value.get(sub_key): 21 | yield value[sub_key] 22 | 23 | 24 | def browse_keys( 25 | entry, keys: List[str], sub_key: Optional[str] = None 26 | ) -> Optional[str]: 27 | for key in keys: 28 | for value in reach_in(entry, key, sub_key): 29 | return value 30 | return None 31 | -------------------------------------------------------------------------------- /jarr/crawler/lib/headers_handling.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | from datetime import timedelta, timezone 4 | 5 | import dateutil.parser 6 | 7 | from jarr.bootstrap import conf 8 | from jarr.lib.const import FEED_ACCEPT_HEADERS 9 | from jarr.lib.utils import digest, rfc_1123_utc, utc_now 10 | 11 | logger = logging.getLogger(__name__) 12 | MAX_AGE_RE = re.compile('max-age=([0-9]+)') 13 | 14 | 15 | def _extract_max_age(headers, feed_info): 16 | if 'max-age' in headers.get('cache-control', ''): 17 | try: 18 | max_age = int(MAX_AGE_RE.search(headers['cache-control']).group(1)) 19 | feed_info['expires'] = utc_now() + timedelta(seconds=max_age) 20 | except Exception: 21 | logger.exception("something went wrong while parsing max-age") 22 | 23 | 24 | def _extract_expires(headers, feed_info): 25 | if headers.get('expires'): 26 | try: 27 | expires = dateutil.parser.parse(headers['expires']) 28 | if expires.tzinfo: 29 | expires = expires.astimezone(timezone.utc) 30 | else: 31 | expires = expires.replace(tzinfo=timezone.utc) 32 | feed_info['expires'] = expires 33 | except Exception: 34 | logger.exception("something went wrong while parsing expires") 35 | 36 | 37 | def extract_feed_info(headers, text=None): 38 | """ 39 | Providing the headers of a feed response, 40 | will calculate the headers needed for basic cache control, 41 | will extract etag and last modified, 42 | and will calculate expires, with limit define in configuration file by 43 | FEED_MIN_EXPIRES and FEED_MAX_EXPIRES. 44 | """ 45 | 46 | feed_info = {'etag': headers.get('etag', ''), 47 | 'last_modified': headers.get('last-modified', rfc_1123_utc())} 48 | if text and not feed_info['etag']: 49 | feed_info['etag'] = 'jarr/"%s"' % digest(text) 50 | 51 | _extract_max_age(headers, feed_info) 52 | if 'expires' not in feed_info: 53 | _extract_expires(headers, feed_info) 54 | return feed_info 55 | 56 | 57 | def prepare_headers(feed): 58 | """For a known feed, will construct some header dictionnary""" 59 | headers = {'User-Agent': conf.crawler.user_agent, 60 | 'Accept': FEED_ACCEPT_HEADERS} 61 | if feed.last_modified: 62 | headers['If-Modified-Since'] = feed.last_modified 63 | if feed.etag and 'jarr' not in feed.etag: 64 | headers['If-None-Match'] = feed.etag 65 | if 'If-Modified-Since' in headers or 'If-None-Match' in headers: 66 | headers['A-IM'] = 'feed' 67 | logger.debug('%r %r - calculated headers %r', feed.id, feed.title, headers) 68 | return headers 69 | -------------------------------------------------------------------------------- /jarr/crawler/requests_utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from jarr.lib.utils import digest 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | 8 | def response_etag_match(feed, resp): 9 | if feed.etag and resp.headers.get('etag'): 10 | if feed.etag.startswith('jarr/'): 11 | return False # it's a jarr generated etag 12 | if resp.headers['etag'] == feed.etag: 13 | logger.info("%r: responded with same etag (%d)", 14 | feed, resp.status_code) 15 | return True 16 | return False 17 | 18 | 19 | def response_calculated_etag_match(feed, resp): 20 | if f'jarr/"{digest(resp.text)}"' == feed.etag: 21 | logger.info("%r: calculated hash matches (%d)", 22 | feed, resp.status_code) 23 | return True 24 | return False 25 | -------------------------------------------------------------------------------- /jarr/crawler/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import datetime 3 | from enum import Enum 4 | from functools import wraps 5 | from hashlib import sha256 6 | 7 | from jarr.bootstrap import conf, REDIS_CONN 8 | from jarr.metrics import WORKER 9 | 10 | logger = logging.getLogger(__name__) 11 | LOCK_EXPIRE = 60 * 60 12 | 13 | 14 | def observe_worker_result_since(start, method, result): 15 | duration = (datetime.now() - start).total_seconds() 16 | WORKER.labels(method=method, result=result).observe(duration) 17 | 18 | 19 | class Queues(Enum): 20 | DEFAULT = conf.celery.task_default_queue 21 | CRAWLING = 'jarr-crawling' 22 | CLUSTERING = 'jarr-clustering' 23 | 24 | 25 | def lock(prefix, expire=LOCK_EXPIRE): 26 | def metawrapper(func): 27 | @wraps(func) 28 | def wrapper(args): 29 | start = datetime.now() 30 | key = str(args).encode('utf8') 31 | key = f"lock-{prefix}-{sha256(key).hexdigest()}" 32 | if REDIS_CONN.setnx(key, 'locked'): 33 | REDIS_CONN.expire(key, expire) 34 | try: 35 | return func(args) 36 | except Exception as error: 37 | observe_worker_result_since(start, prefix, 38 | error.__class__.__name__) 39 | logger.debug('something wrong happen %r', error) 40 | raise 41 | finally: 42 | observe_worker_result_since(start, prefix, 'ok') 43 | REDIS_CONN.delete(key) 44 | else: 45 | observe_worker_result_since(start, prefix, 'skipped') 46 | return wrapper 47 | return metawrapper 48 | -------------------------------------------------------------------------------- /jarr/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jaesivsm/JARR/b7cafda5f813868ba8e2fb41136241407cd83dfa/jarr/lib/__init__.py -------------------------------------------------------------------------------- /jarr/lib/clustering_af/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jaesivsm/JARR/b7cafda5f813868ba8e2fb41136241407cd83dfa/jarr/lib/clustering_af/__init__.py -------------------------------------------------------------------------------- /jarr/lib/clustering_af/extra_stopwords.py: -------------------------------------------------------------------------------- 1 | common_stopwords = {'news'} 2 | 3 | extra_stopwords = { 4 | 'french': {'afp', 'flash', 'actual', 'actu', 'info', 5 | 'nouvel', 'dépech', 'brev', 'depech'}.union(common_stopwords), 6 | 'english': common_stopwords, 7 | 'german': common_stopwords, 8 | 'spanish': common_stopwords, 9 | } 10 | -------------------------------------------------------------------------------- /jarr/lib/clustering_af/grouper.py: -------------------------------------------------------------------------------- 1 | """ 2 | Group articles that talks about the same subject. 3 | If two articles, in the same category, have enough similar tokens, we assume 4 | that they talk about the same subject, and we group them in a meta-article 5 | """ 6 | from collections import OrderedDict 7 | from jarr.lib.clustering_af.vector import TFIDFVector 8 | from jarr.models.article import Article 9 | 10 | 11 | def get_cosine_similarity(left_vector: TFIDFVector, 12 | article: Article, 13 | term_frequencies: OrderedDict, 14 | corpus_size: int) -> float: 15 | """ 16 | For a given vector and an article will return their cosine similarity. 17 | 18 | Parameter 19 | --------- 20 | left_vector: lib.clustering_ad.vector.SparseVector 21 | the vector of the main article 22 | article: models.article.Article 23 | frequencies: dict 24 | key = term in the corpus, value = number of document with that term 25 | corpus_size: int 26 | the total number of documents in the sample 27 | 28 | """ 29 | right_vector = article.get_tfidf_vector(term_frequencies, corpus_size) 30 | norms = left_vector.norm * right_vector.norm 31 | if not norms: 32 | return 0 33 | return (left_vector * right_vector) / norms 34 | 35 | 36 | def get_terms_frequencies(*articles): 37 | """ 38 | Parameter 39 | --------- 40 | articles: models.article objects 41 | 42 | Return 43 | ------ 44 | dictionnary: {token: number of documents containing tokens} 45 | """ 46 | frequencies = OrderedDict() 47 | for article in articles: 48 | for term in article.simple_vector: 49 | if term in frequencies: 50 | frequencies[term] += 1 51 | else: 52 | frequencies[term] = 1 53 | return frequencies 54 | 55 | 56 | def get_best_match_and_score(article, neighbors): 57 | corpus_size = 1 + len(neighbors) # current article + neighbors 58 | term_frequencies = get_terms_frequencies(article, *neighbors) 59 | vector = article.get_tfidf_vector(term_frequencies, corpus_size, 60 | will_be_left_member=True) 61 | rank = {get_cosine_similarity(vector, neighbor, 62 | term_frequencies, corpus_size): neighbor 63 | for neighbor in neighbors} 64 | return rank[max(rank)], max(rank) 65 | -------------------------------------------------------------------------------- /jarr/lib/clustering_af/postgres_casting.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | from sqlalchemy import func 3 | from jarr.bootstrap import conf 4 | 5 | 6 | LANG_TO_PSQL_MAPPING = {'da': 'danish', 7 | 'nl': 'dutch', 8 | 'du': 'dutch', 9 | 'en': 'english', 10 | 'uk': 'english', 11 | 'us': 'english', 12 | 'fi': 'finnish', 13 | 'fr': 'french', 14 | 'de': 'german', 15 | 'ge': 'german', 16 | 'hu': 'hungarian', 17 | 'it': 'italian', 18 | 'no': 'norwegian', 19 | 'pt': 'portuguese', 20 | 'po': 'portuguese', 21 | 'ro': 'romanian', 22 | 'ru': 'russian', 23 | 'es': 'spanish', 24 | 'sv': 'swedish', 25 | 'sw': 'swedish', 26 | 'ts': 'turkish', 27 | 'tk': 'turkish', 28 | 'tw': 'turkish', 29 | 'tr': 'turkish'} 30 | 31 | 32 | def get_postgres_lang(lang): 33 | return LANG_TO_PSQL_MAPPING.get((lang or '').lower()[:2], 34 | conf.clustering.tfidf.default_lang) 35 | 36 | 37 | def to_vector(extract=None, parsed=None): 38 | if not extract and not parsed: 39 | return 40 | title, tags, content, lang = None, None, None, None 41 | if parsed: 42 | title, tags = parsed.title, ' '.join(parsed.tags) 43 | content = parsed.cleaned_text 44 | lang = get_postgres_lang(parsed.meta_lang) 45 | if extract: 46 | title = extract.get('title') 47 | tags = ' '.join(extract.get('tags') or []) 48 | lang = get_postgres_lang(extract.get('lang')) 49 | if not content and extract.get('content'): 50 | content = BeautifulSoup(extract['content'], 'html.parser').text 51 | statement = None 52 | for value, weight in (title, 'A'), (content, 'B'), (tags, 'C'): 53 | if not value: 54 | continue 55 | vector = func.setweight(func.to_tsvector(lang, value), weight) 56 | if statement is None: 57 | statement = vector 58 | else: 59 | statement = statement.op('||')(vector) 60 | return statement 61 | -------------------------------------------------------------------------------- /jarr/lib/const.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timezone 2 | 3 | UNIX_START = datetime(1970, 1, 1, tzinfo=timezone.utc) 4 | MIMETYPES = (('application/feed+json', 1), 5 | ('application/atom+xml', 1), 6 | ('application/rss+xml', 1), 7 | ('application/rdf+xml', 0.9), 8 | ('application/xml', 0.8), 9 | ('application/json', 0.8), 10 | ('text/xml', 0.6), 11 | ('*/*', 0.2)) 12 | 13 | FEED_ACCEPT_HEADERS = ','.join(mtype + (';q=%0.1f' % qual if qual < 1 else '') 14 | for mtype, qual in MIMETYPES) 15 | FEED_MIMETYPES = [mtype for mtype, quality in MIMETYPES if quality >= 0.5] 16 | REQUIRED_JSON_FEED = {'version', 'title'} 17 | GOOGLE_BOT_UA = ("Mozilla/5.0 (compatible; Googlebot/2.1; " 18 | "+http://www.google.com/bot.html)") 19 | -------------------------------------------------------------------------------- /jarr/lib/emails.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | import logging 3 | import smtplib 4 | from email.mime.multipart import MIMEMultipart 5 | from email.mime.text import MIMEText 6 | from jarr.lib.utils import rfc_1123_utc 7 | 8 | from jarr.bootstrap import conf 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | def send(to="", bcc="", subject="", plaintext=""): 14 | """Send an email.""" 15 | # Create message container - the correct MIME type is multipart/alternative 16 | msg = MIMEMultipart('alternative') 17 | msg['Subject'] = subject 18 | msg['From'] = conf.notification.email 19 | msg['Date'] = rfc_1123_utc() 20 | msg['To'] = to 21 | msg.attach(MIMEText(plaintext, 'plain', 'utf-8')) 22 | 23 | with smtplib.SMTP(host=conf.notification.host, 24 | port=conf.notification.port) as smtp: 25 | smtp.ehlo() 26 | if conf.notification.starttls: 27 | smtp.starttls() 28 | smtp.ehlo() 29 | smtp.login(conf.notification.login, conf.notification.password) 30 | smtp.sendmail(conf.notification.email, [msg['To']], msg.as_string()) 31 | -------------------------------------------------------------------------------- /jarr/lib/enums.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class FeedStatus(Enum): 5 | active = 'active' 6 | paused = 'paused' 7 | to_delete = 'to_delete' 8 | deleting = 'deleting' 9 | 10 | 11 | class FeedType(Enum): 12 | classic = 'classic' 13 | json = 'json' 14 | tumblr = 'tumblr' 15 | instagram = 'instagram' 16 | soundcloud = 'soundcloud' 17 | reddit = 'reddit' 18 | koreus = 'koreus' 19 | twitter = 'twitter' 20 | 21 | 22 | class ArticleType(Enum): 23 | image = 'image' 24 | video = 'video' 25 | audio = 'audio' 26 | embedded = 'embedded' 27 | 28 | 29 | class ClusterReason(Enum): 30 | original = 'original' # the article is the cluster's original one 31 | # the article has the same name and share a suitable category 32 | title = 'title' 33 | # the article has the same link that the main one of the cluster 34 | link = 'link' 35 | tf_idf = 'tf_idf' # the article has been clustered through tf_idf 36 | 37 | 38 | class ReadReason(Enum): 39 | read = 'read' # the user has read the cluster 40 | consulted = 'consulted' # the user was redirected to the cluster 41 | marked = 'marked' # the user has marked the cluster as read 42 | # the cluster was marked as read along with others 43 | mass_marked = 'mass_marked' 44 | # the cluster was marked as read by a filter 45 | filtered = 'filtered' 46 | wake_up = 'wake_up' 47 | -------------------------------------------------------------------------------- /jarr/metrics.py: -------------------------------------------------------------------------------- 1 | from prometheus_client import CollectorRegistry 2 | from prometheus_distributed_client import Gauge, Counter, Histogram 3 | 4 | REGISTRY = CollectorRegistry() 5 | BUCKETS_3H = [3, 4, 5, 6, 9, 12, 18, 26, 38, 57, 85, 126, 189, 282, 423, 633, 6 | 949, 1423, 2134, 3201, 4801, 7200, 10798] 7 | BUCKETS_7D = [615, 922, 1383, 2073, 3109, 4663, 6994, 10490, 15734, 23600, 8 | 35399, 53098, 79646, 119468, 179201, 268801, 403200, 604798] 9 | 10 | 11 | def prom(metric_cls, *args, **kwargs): 12 | return metric_cls(*args, namespace='jarr', registry=REGISTRY, **kwargs) 13 | 14 | 15 | READ = prom(Counter, 'read', 'Read event', ['reason']) 16 | 17 | FEED_FETCH = prom(Counter, 'feed_fetch', 'Feed fetching event', 18 | ['feed_type', 'result']) 19 | 20 | FEED_LATENESS = prom(Histogram, 'feed_lateness', 21 | 'observed delta time when fetching feed', 22 | ['feed_type'], buckets=BUCKETS_7D) 23 | 24 | FEED_EXPIRES = prom(Histogram, 'feed_expires', 25 | 'detlta time in second observed when setting expires', 26 | ['feed_type', 'method'], buckets=BUCKETS_7D) 27 | 28 | WORKER_BATCH = prom(Histogram, 'worker_batch', 'worker batch size', 29 | ['worker_type'], buckets=BUCKETS_3H[:-7]) 30 | 31 | TFIDF_SCORE = prom(Histogram, 'tfidf_score', 'TFIDF scores', ['feed_type'], 32 | buckets=[i / 100 for i in range(0, 100, 10)]) 33 | 34 | WORKER = prom(Histogram, 'worker_method', 'worker taken actions', 35 | ['method', 'result'], buckets=BUCKETS_3H) 36 | 37 | EVENT = prom(Counter, 'event', 'events', ['module', 'context', 'result']) 38 | 39 | ARTICLE_CREATION = prom(Counter, 'article_creation', 'Article Creation', 40 | ['read', 'read_reason', 'cluster']) 41 | 42 | SERVER = prom(Counter, 'server_method', 'HTTP method served', 43 | ['uri', 'method', 'result']) 44 | 45 | USER = prom(Gauge, 'users', 'User counts', ['status']) 46 | 47 | ARTICLES = prom(Gauge, 'articles', 'Article counts', ['status']) 48 | -------------------------------------------------------------------------------- /jarr/models/__init__.py: -------------------------------------------------------------------------------- 1 | """All the JARR's models.""" 2 | 3 | from .feed import Feed 4 | from .user import User 5 | from .article import Article 6 | from .icon import Icon 7 | from .category import Category 8 | from .cluster import Cluster 9 | 10 | __all__ = ['Feed', 'User', 'Article', 'Icon', 'Category', 'Cluster'] 11 | -------------------------------------------------------------------------------- /jarr/models/category.py: -------------------------------------------------------------------------------- 1 | from jarr.bootstrap import Base 2 | from sqlalchemy import (Boolean, Column, ForeignKeyConstraint, Index, Integer, 3 | PickleType, String) 4 | from sqlalchemy.orm import relationship, validates 5 | 6 | 7 | class Category(Base): # type: ignore 8 | __tablename__ = 'category' 9 | 10 | id = Column(Integer, primary_key=True) 11 | name = Column(String) 12 | 13 | # clustering control 14 | cluster_enabled = Column(Boolean, default=None, nullable=True) 15 | cluster_tfidf_enabled = Column(Boolean, default=None, nullable=True) 16 | cluster_same_category = Column(Boolean, default=None, nullable=True) 17 | cluster_same_feed = Column(Boolean, default=None, nullable=True) 18 | cluster_wake_up = Column(Boolean, default=None, nullable=True) 19 | cluster_conf = Column(PickleType, default={}) 20 | 21 | # foreign keys 22 | user_id = Column(Integer, nullable=False) 23 | 24 | # relationships 25 | user = relationship( 26 | 'User', back_populates='categories', uselist=False) 27 | feeds = relationship( 28 | 'Feed', back_populates='category', 29 | cascade='all,delete-orphan', uselist=False) 30 | articles = relationship( 31 | 'Article', back_populates='category', cascade='all,delete-orphan') 32 | clusters = relationship( 33 | 'Cluster', back_populates='categories', 34 | foreign_keys='[Article.category_id, Article.cluster_id]', 35 | secondary='article', overlaps="articles,category,cluster,clusters") 36 | 37 | __table_args__ = ( 38 | ForeignKeyConstraint([user_id], ['user.id'], 39 | ondelete='CASCADE'), 40 | Index('ix_category_uid', user_id), 41 | ) 42 | 43 | @validates("name") 44 | def string_cleaning(self, key, value): 45 | return str(value if value is not None else '').strip() 46 | 47 | def __repr__(self): 48 | return f"" 49 | -------------------------------------------------------------------------------- /jarr/models/cluster.py: -------------------------------------------------------------------------------- 1 | from jarr.bootstrap import Base 2 | from jarr.lib.enums import ReadReason 3 | from jarr.lib.utils import utc_now 4 | from jarr.models.article import Article 5 | from jarr.models.utc_datetime_type import UTCDateTime 6 | from sqlalchemy import (Boolean, Column, Enum, ForeignKey, 7 | ForeignKeyConstraint, Index, Integer, PickleType, 8 | String) 9 | from sqlalchemy.orm import relationship 10 | 11 | 12 | class Cluster(Base): # type: ignore 13 | "Represent a cluster of articles from one or several feeds" 14 | __tablename__ = "cluster" 15 | 16 | id = Column(Integer, primary_key=True) 17 | cluster_type = Column(String) 18 | read = Column(Boolean, default=False) 19 | liked = Column(Boolean, default=False) 20 | created_date = Column(UTCDateTime, default=utc_now) 21 | content = Column(PickleType, default={}) 22 | 23 | # denorm 24 | main_date = Column(UTCDateTime, default=utc_now) 25 | main_feed_title = Column(String) 26 | main_title = Column(String) 27 | main_link = Column(String, default=None) 28 | 29 | # reasons 30 | read_reason = Column(Enum(ReadReason), default=None) # type: ignore 31 | 32 | # foreign keys 33 | user_id = Column(Integer, nullable=False) 34 | main_article_id = Column( 35 | Integer, ForeignKey("article.id", name="fk_article_id", use_alter=True) 36 | ) 37 | 38 | # relationships 39 | user = relationship("User", back_populates="clusters") 40 | main_article = relationship( 41 | Article, uselist=False, foreign_keys=main_article_id 42 | ) 43 | articles = relationship( 44 | Article, 45 | back_populates="cluster", 46 | foreign_keys=[Article.cluster_id], 47 | order_by=Article.date.asc(), 48 | ) 49 | feeds = relationship( 50 | "Feed", 51 | back_populates="clusters", 52 | secondary="article", 53 | foreign_keys=[Article.feed_id, Article.cluster_id], 54 | overlaps="articles,clusters,cluster,feed", 55 | ) 56 | categories = relationship( 57 | "Category", 58 | back_populates="clusters", 59 | secondary="article", 60 | foreign_keys=[Article.cluster_id, Article.category_id], 61 | overlaps="articles,category,cluster,clusters,feeds", 62 | ) 63 | 64 | __table_args__ = ( 65 | ForeignKeyConstraint([user_id], ["user.id"], ondelete="CASCADE"), 66 | Index("ix_cluster_uid_date", user_id, main_date.desc().nullslast()), 67 | Index("ix_cluster_liked_uid", liked, user_id), 68 | Index("ix_cluster_read_uid", read, user_id), 69 | # used by cluster deletion in FeedController.delete 70 | Index("ix_cluster_uid_martid", user_id, main_article_id.nullsfirst()), 71 | # triggered by article.ondelete 72 | Index("ix_cluster_martid", main_article_id.nullslast()), 73 | ) 74 | 75 | @property 76 | def categories_id(self): 77 | return {category.id for category in self.categories} 78 | 79 | @property 80 | def feeds_id(self): 81 | return {feed.id for feed in self.feeds} 82 | 83 | @property 84 | def icons_url(self): 85 | return {feed.icon_url for feed in self.feeds} 86 | 87 | def __repr__(self): 88 | return ( 89 | f"" 91 | ) 92 | -------------------------------------------------------------------------------- /jarr/models/icon.py: -------------------------------------------------------------------------------- 1 | from jarr.bootstrap import Base 2 | from sqlalchemy import Column, String 3 | from sqlalchemy.orm import relationship 4 | 5 | 6 | class Icon(Base): # type: ignore 7 | __tablename__ = "icon" 8 | 9 | url = Column(String, primary_key=True) 10 | content = Column(String, default=None) 11 | mimetype = Column(String, default="application/image") 12 | 13 | # relationships 14 | feeds = relationship("Feed", backref="icon") 15 | -------------------------------------------------------------------------------- /jarr/models/user.py: -------------------------------------------------------------------------------- 1 | import re 2 | from sqlalchemy import Boolean, Column, Integer, PickleType, String 3 | from sqlalchemy.orm import relationship, validates 4 | 5 | from jarr.bootstrap import Base, conf 6 | from jarr.lib.utils import utc_now 7 | from jarr.models.utc_datetime_type import UTCDateTime 8 | 9 | 10 | class User(Base): # type: ignore 11 | __tablename__ = 'user' 12 | 13 | id = Column(Integer, primary_key=True) 14 | login = Column(String, unique=True) 15 | password = Column(String) 16 | email = Column(String(254)) 17 | date_created = Column(UTCDateTime, default=utc_now) 18 | last_connection = Column(UTCDateTime, default=utc_now) 19 | renew_password_token = Column(String, default='') 20 | 21 | timezone = Column(String, default=conf.timezone) 22 | 23 | # clustering control 24 | cluster_enabled = Column(Boolean, default=True, nullable=False) 25 | cluster_tfidf_enabled = Column(Boolean, default=True, nullable=False) 26 | cluster_same_category = Column(Boolean, default=True, nullable=False) 27 | cluster_same_feed = Column(Boolean, default=True, nullable=False) 28 | cluster_wake_up = Column(Boolean, default=True, nullable=False) 29 | cluster_conf = Column(PickleType, default={}) 30 | 31 | # user rights 32 | is_active = Column(Boolean, default=True) 33 | is_admin = Column(Boolean, default=False) 34 | is_api = Column(Boolean, default=False) 35 | 36 | # oauth identites 37 | google_identity = Column(String) 38 | twitter_identity = Column(String) 39 | facebook_identity = Column(String) 40 | linuxfr_identity = Column(String) 41 | 42 | # relationships 43 | categories = relationship( 44 | 'Category', back_populates='user', cascade='all, delete-orphan', 45 | foreign_keys='[Category.user_id]') 46 | feeds = relationship( 47 | 'Feed', back_populates='user', cascade='all, delete-orphan', 48 | foreign_keys='[Feed.user_id]') 49 | articles = relationship( 50 | 'Article', back_populates='user', cascade='all, delete-orphan', 51 | foreign_keys='[Article.user_id]') 52 | clusters = relationship( 53 | 'Cluster', back_populates='user', cascade='all, delete-orphan', 54 | foreign_keys='[Cluster.user_id]') 55 | 56 | @validates('login') 57 | def string_cleaning(self, key, value): 58 | return re.sub(r'[^a-zA-Z0-9_\.]', '', value.strip()) 59 | 60 | def __repr__(self): 61 | """Represents a user with its id.""" 62 | return f"" 63 | -------------------------------------------------------------------------------- /jarr/models/utc_datetime_type.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import types 2 | from datetime import datetime, timezone 3 | 4 | 5 | class UTCDateTime(types.TypeDecorator): 6 | impl = types.DateTime 7 | python_type = datetime 8 | cache_ok = True 9 | 10 | @staticmethod 11 | def process_bind_param(value, dialect): 12 | if value is not None: 13 | if not value.tzinfo: 14 | value = value.replace(tzinfo=timezone.utc) 15 | return value.astimezone(timezone.utc).replace(tzinfo=None) 16 | return value 17 | 18 | @staticmethod 19 | def process_result_value(value, dialect): 20 | if value is not None: 21 | if value.tzinfo: 22 | raise ValueError(f"{value!r} tzinfo is defined, shouldn't be") 23 | return value.replace(tzinfo=timezone.utc) 24 | return value 25 | 26 | @staticmethod 27 | def process_literal_param(value, dialect): 28 | raise NotImplementedError(f"can't process {value!r} for {dialect!r}") 29 | -------------------------------------------------------------------------------- /jarr/signals.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from blinker import signal 4 | 5 | from jarr.metrics import EVENT 6 | 7 | event = signal('jarr-event') 8 | 9 | 10 | @event.connect 11 | def bump_metric(sender, **kwargs): 12 | EVENT.labels(**{key: kwargs[key] for key in ('module', 'context', 13 | 'result')}).inc() 14 | 15 | 16 | @event.connect 17 | def log(sender, level=logging.DEBUG, **kwargs): 18 | logger = logging.getLogger(kwargs['module']) 19 | logger.info("%s > %r: %s", 20 | kwargs['module'], kwargs["context"], kwargs["result"]) 21 | -------------------------------------------------------------------------------- /jarr/static/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jaesivsm/JARR/b7cafda5f813868ba8e2fb41136241407cd83dfa/jarr/static/img/favicon.ico -------------------------------------------------------------------------------- /jarr/static/img/pinboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jaesivsm/JARR/b7cafda5f813868ba8e2fb41136241407cd83dfa/jarr/static/img/pinboard.png -------------------------------------------------------------------------------- /jarr/static/img/readability.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jaesivsm/JARR/b7cafda5f813868ba8e2fb41136241407cd83dfa/jarr/static/img/readability.png -------------------------------------------------------------------------------- /jarr/static/img/reddit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jaesivsm/JARR/b7cafda5f813868ba8e2fb41136241407cd83dfa/jarr/static/img/reddit.png -------------------------------------------------------------------------------- /jarr/static/img/twitter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jaesivsm/JARR/b7cafda5f813868ba8e2fb41136241407cd83dfa/jarr/static/img/twitter.png -------------------------------------------------------------------------------- /jarr/templates/mail_password_recovery.txt: -------------------------------------------------------------------------------- 1 | Hello, 2 | 3 | A password change request has been made for your account on JARR({{ plateform }}). 4 | If you have made that request please follow the link below to renew your 5 | account, otherwise, disregard this email. 6 | 7 | {{ landing_url }} 8 | 9 | Regards, 10 | 11 | The JARR administrator 12 | -------------------------------------------------------------------------------- /jarr/templates/opml.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Feeds of {{ user.login }} 6 | {{ now.isoformat() }} 7 | {{ now.isoformat() }} 8 | {{ user.login }} 9 | {{ user.email }} 10 | 11 | {% for feed in feeds %} 12 | {% endfor %} 13 | 14 | -------------------------------------------------------------------------------- /jsclient/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # production 12 | /build 13 | 14 | # misc 15 | .DS_Store 16 | .env.local 17 | .env.development.local 18 | .env.test.local 19 | .env.production.local 20 | 21 | npm-debug.log* 22 | yarn-debug.log* 23 | yarn-error.log* 24 | -------------------------------------------------------------------------------- /jsclient/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "JARR-jsclient", 3 | "version": "0.1.0", 4 | "private": true, 5 | "dependencies": { 6 | "@material-ui/core": "4.*", 7 | "@material-ui/icons": "4.*", 8 | "@material-ui/lab": "4.0.0-alpha.61", 9 | "@reduxjs/toolkit": "1.*", 10 | "axios": "1.*", 11 | "moment": "2.*", 12 | "node-notifier": "8.*", 13 | "object-path": "0.11.5", 14 | "react": "16.*", 15 | "react-dev-utils": "11.*", 16 | "react-dom": "16.*", 17 | "react-redux": "7.*", 18 | "react-router-dom": "5.*", 19 | "react-scripts": "3.*", 20 | "react-window": "1.*", 21 | "redux": "4.*", 22 | "ssri": "8.*", 23 | "thunk": "^0.0.1", 24 | "typescript": "3.*" 25 | }, 26 | "scripts": { 27 | "start": "react-scripts start", 28 | "build": "react-scripts build", 29 | "test": "react-scripts test", 30 | "eject": "react-scripts eject" 31 | }, 32 | "eslintConfig": { 33 | "extends": "react-app" 34 | }, 35 | "browserslist": { 36 | "production": [ 37 | ">0.2%", 38 | "not dead", 39 | "not op_mini all" 40 | ], 41 | "development": [ 42 | "last 1 chrome version", 43 | "last 1 firefox version", 44 | "last 1 safari version" 45 | ] 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /jsclient/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jaesivsm/JARR/b7cafda5f813868ba8e2fb41136241407cd83dfa/jsclient/public/favicon.ico -------------------------------------------------------------------------------- /jsclient/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 13 | 17 | 18 | 27 | JARR 28 | 29 | 30 | 31 |
32 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /jsclient/public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "JARR", 3 | "name": "JARR - Just Another Rss Reader", 4 | "icons": [ 5 | { 6 | "src": "favicon.ico", 7 | "sizes": "64x64 32x32 24x24 16x16", 8 | "type": "image/x-icon" 9 | } 10 | ], 11 | "start_url": ".", 12 | "display": "standalone", 13 | "theme_color": "#000000", 14 | "background_color": "#ffffff" 15 | } 16 | -------------------------------------------------------------------------------- /jsclient/public/robots.txt: -------------------------------------------------------------------------------- 1 | # https://www.robotstxt.org/robotstxt.html 2 | User-agent: * 3 | -------------------------------------------------------------------------------- /jsclient/src/Jarr.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import PropTypes from "prop-types"; 3 | import { connect } from "react-redux"; 4 | import { BrowserRouter } from "react-router-dom"; 5 | 6 | import CssBaseline from "@material-ui/core/CssBaseline"; 7 | import { ThemeProvider } from "@material-ui/styles"; 8 | 9 | import {jarrTheme, jarrLoginTheme} from "./Jarr.theme"; 10 | import useStyles from "./Jarr.styles.js"; 11 | import NoAuth from "./features/noauth/NoAuth"; 12 | import TopMenu from "./features/topmenu/TopMenu"; 13 | import FeedList from "./features/feedlist/FeedList"; 14 | import EditPanel from "./features/editpanel/EditPanel"; 15 | import ClusterList from "./features/clusterlist/ClusterList"; 16 | 17 | function mapStateToProps(state) { 18 | return { isLogged: !!state.auth.accessToken, }; 19 | } 20 | 21 | function Jarr({ isLogged, isLeftMenuOpen }) { 22 | const classes = useStyles(); 23 | if (!isLogged) { 24 | return ( 25 | 26 | 27 |
28 | 29 | 30 |
31 |
32 |
33 | ); 34 | } 35 | return ( 36 | 37 |
38 | 39 | 40 | 41 | 42 | 43 |
44 |
45 | ); 46 | } 47 | 48 | Jarr.propTypes = { 49 | isLogged: PropTypes.bool.isRequired, 50 | }; 51 | 52 | export default connect(mapStateToProps)(Jarr); 53 | -------------------------------------------------------------------------------- /jsclient/src/Jarr.styles.js: -------------------------------------------------------------------------------- 1 | import { makeStyles, Theme, createStyles } from "@material-ui/core/styles"; 2 | 3 | export default makeStyles((theme: Theme) => 4 | createStyles({ 5 | root: { 6 | display: "flex", 7 | }, 8 | }), 9 | ); 10 | -------------------------------------------------------------------------------- /jsclient/src/Jarr.theme.js: -------------------------------------------------------------------------------- 1 | import { createMuiTheme } from "@material-ui/core/styles"; 2 | 3 | export const jarrColors = { 4 | primary : { 5 | main : "#5F9EA0", 6 | contrastText: "#ffffff" 7 | }, 8 | secondary: { 9 | main : "#6495ed", 10 | contrastText: "#ffffff" 11 | }, 12 | background: { 13 | default: "rgb(95,158,160, 0.6)" 14 | }, 15 | danger: { 16 | main: "#F08080", 17 | hover: "#CD5C5C", 18 | contrastText: "#ffffff" 19 | }, 20 | } 21 | 22 | export const jarrLoginTheme = createMuiTheme({ 23 | palette: { 24 | primary: jarrColors.primary, 25 | secondary: jarrColors.secondary, 26 | background: jarrColors.background, 27 | } 28 | }); 29 | 30 | export const jarrTheme = createMuiTheme({ 31 | palette: { 32 | primary: jarrColors.primary, 33 | secondary: jarrColors.secondary, 34 | } 35 | }); 36 | -------------------------------------------------------------------------------- /jsclient/src/app/store.js: -------------------------------------------------------------------------------- 1 | import { configureStore, Action } from "@reduxjs/toolkit"; 2 | import thunk, { ThunkAction }from "redux-thunk"; 3 | import authReducer from "../authSlice"; 4 | import noAuthReducer from "../features/noauth/noAuthSlice"; 5 | import feedsReducer from "../features/feedlist/slice"; 6 | import clustersReducer from "../features/clusterlist/slice"; 7 | import editReducer from "../features/editpanel/slice"; 8 | 9 | export default configureStore({ 10 | reducer: { 11 | auth: authReducer, 12 | noauth: noAuthReducer, 13 | feeds: feedsReducer, 14 | clusters: clustersReducer, 15 | edit: editReducer, 16 | }, 17 | middleware: [thunk], 18 | }); 19 | 20 | export type AppThunk = ThunkAction>; 21 | -------------------------------------------------------------------------------- /jsclient/src/authSlice.js: -------------------------------------------------------------------------------- 1 | import axios from "axios"; 2 | import { createSlice } from "@reduxjs/toolkit"; 3 | import { apiUrl } from "./const"; 4 | import { storageGet, storageSet, storageRemove } from "./storageUtils"; 5 | import { authError } from "./features/noauth/noAuthSlice"; 6 | 7 | const authSlice = createSlice({ 8 | name: "auth", 9 | initialState: { accessToken: null, 10 | accessTokenExpiresAt: null, 11 | refreshToken: storageGet("refreshToken", "local"), 12 | }, 13 | reducers: { 14 | tokenAcquired: (state, action) => { 15 | const accessToken = action.payload.data["access_token"]; 16 | const accessTokenExpiresAt = new Date(action.payload.data["access_token_expires_at"]).getTime(); 17 | const refreshToken = action.payload.data["refresh_token"]; 18 | if (refreshToken) { 19 | storageSet("refreshToken", refreshToken, "local"); 20 | return { ...state, accessToken, refreshToken, accessTokenExpiresAt }; 21 | } else { 22 | return { ...state, accessToken, accessTokenExpiresAt }; 23 | } 24 | }, 25 | purgeCredentials: () => { 26 | storageRemove("refreshToken", "local"); 27 | return { accessToken: null, refreshToken: null, accessTokenExpiresAt: null }; 28 | }, 29 | }, 30 | }); 31 | 32 | export const { tokenAcquired, purgeCredentials} = authSlice.actions; 33 | 34 | export default authSlice.reducer; 35 | 36 | export const doRetryOnTokenExpiration = async (payload, dispatch, getState) => { 37 | const now = new Date().getTime(); 38 | const state = getState(); 39 | if (!state.auth.accessTokenExpiresAt 40 | || state.auth.accessTokenExpiresAt <= now) { 41 | // token has expired, trying to refresh it 42 | try { 43 | const result = await axios({ 44 | method: "post", 45 | url: `${apiUrl}/auth/refresh`, 46 | headers: { "Authorization": state.auth.refreshToken } 47 | }); 48 | dispatch(tokenAcquired(result)); 49 | payload.headers = { "Authorization": result.data["access_token"] }; 50 | } catch (err) { // failed to refresh it, logging out 51 | dispatch(purgeCredentials()); 52 | dispatch(authError({ statusText: "EXPIRED" })); 53 | throw err; 54 | } 55 | } else { 56 | payload.headers = { "Authorization": state.auth.accessToken }; 57 | } 58 | try { 59 | return await axios(payload); 60 | } catch (err) { 61 | // token seems to have expired 62 | if (err.response && err.response.status === 401 63 | && err.response.data && err.response.data.message 64 | && err.response.data.message === "Invalid token, Signature has expired") { 65 | dispatch(purgeCredentials()); 66 | dispatch(authError({ statusText: "EXPIRED" })); 67 | } else { 68 | return err.response; 69 | } 70 | } 71 | }; 72 | -------------------------------------------------------------------------------- /jsclient/src/components/ClusterIcon.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import PropTypes from "prop-types"; 3 | import { apiUrl } from "../const"; 4 | import qs from "qs"; 5 | import LinkIcon from "@material-ui/icons/Link"; 6 | import { makeStyles, Theme, createStyles } from "@material-ui/core/styles"; 7 | 8 | const iconStyle = makeStyles((theme: Theme) => 9 | createStyles({ 10 | clusterIcon: { 11 | maxWidth: 16, 12 | maxHeight: 16, 13 | margin: "8px 5px 8px 20px" 14 | }, 15 | }), 16 | ); 17 | 18 | function ClusterIcon({ iconUrl }) { 19 | const classes = iconStyle(); 20 | if (iconUrl) { 21 | return ; 23 | } 24 | return ; 26 | } 27 | 28 | ClusterIcon.propTypes = { 29 | iconUrl: PropTypes.string 30 | }; 31 | 32 | export default ClusterIcon; 33 | -------------------------------------------------------------------------------- /jsclient/src/components/FeedIcon.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import PropTypes from "prop-types"; 3 | import { apiUrl } from "../const"; 4 | import qs from "qs"; 5 | import LinkIcon from "@material-ui/icons/Link"; 6 | import { makeStyles, Theme, createStyles } from "@material-ui/core/styles"; 7 | 8 | const iconStyle = makeStyles((theme: Theme) => 9 | createStyles({ 10 | feedIcon: { 11 | maxWidth: 16, 12 | maxHeight: 16, 13 | margin: "8px 5px 8px 20px", 14 | width: "100%", 15 | height: "auto", 16 | } 17 | }), 18 | ); 19 | 20 | function FeedIcon({ iconUrl }) { 21 | const classes = iconStyle(); 22 | if (iconUrl) { 23 | return ; 25 | } 26 | return ; 28 | } 29 | 30 | FeedIcon.propTypes = { 31 | iconUrl: PropTypes.string 32 | }; 33 | 34 | export default FeedIcon; 35 | -------------------------------------------------------------------------------- /jsclient/src/components/JarrIcon.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jaesivsm/JARR/b7cafda5f813868ba8e2fb41136241407cd83dfa/jsclient/src/components/JarrIcon.gif -------------------------------------------------------------------------------- /jsclient/src/const.js: -------------------------------------------------------------------------------- 1 | export const apiUrl = process.env.REACT_APP_API_URL ? process.env.REACT_APP_API_URL : "http://0.0.0.0:8000"; 2 | export const feedListWidth = 300; 3 | export const editPanelWidth = 500; 4 | export const pageLength = 30; 5 | -------------------------------------------------------------------------------- /jsclient/src/features/clusterlist/components/Article.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import PropTypes from "prop-types"; 3 | import useMediaQuery from "@material-ui/core/useMediaQuery"; 4 | import { useTheme } from "@material-ui/core/styles"; 5 | import Typography from "@material-ui/core/Typography"; 6 | import Link from "@material-ui/core/Link"; 7 | import Divider from "@material-ui/core/Divider"; 8 | 9 | import makeStyles from "./style"; 10 | 11 | function Article({ article, forceShowTitle, hidden }) { 12 | const classes = makeStyles(); 13 | const theme = useTheme(); 14 | const splitedMode = useMediaQuery(theme.breakpoints.up("md")); 15 | let title, comments; 16 | if(forceShowTitle || splitedMode) { 17 | title = ( 18 | <> 19 | {article.title} 20 | 21 | 22 | ); 23 | }; 24 | if (article.comments) { 25 | comments = (

Comments 26 | 28 | {article.comments} 29 |

); 30 | } 31 | return ( 32 | 49 | ); 50 | } 51 | 52 | Article.propTypes = { 53 | article: PropTypes.shape({ 54 | link: PropTypes.string.isRequired, 55 | title: PropTypes.string.isRequired, 56 | content: PropTypes.string.isRequired, 57 | comments: PropTypes.string, 58 | }), 59 | hidden: PropTypes.bool, 60 | forceShowTitle: PropTypes.bool, 61 | }; 62 | Article.defaultProps = { 63 | hidden: false, 64 | forceShowTitle: false 65 | }; 66 | 67 | export default Article; 68 | -------------------------------------------------------------------------------- /jsclient/src/features/clusterlist/components/ProcessedContent.js: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | import PropTypes from "prop-types"; 3 | import Typography from "@material-ui/core/Typography"; 4 | import Link from "@material-ui/core/Link"; 5 | import Divider from "@material-ui/core/Divider"; 6 | 7 | import makeStyles from "./style"; 8 | 9 | function ProcessedContent({ content, hidden }) { 10 | const classes = makeStyles(); 11 | let title, titleDivider, link, comments, linksDivider, body; 12 | if (content.type === "fetched") { 13 | if (content.title) { 14 | title = ({content.title}); 15 | titleDivider = ; 16 | } 17 | if (content.comments) { 18 | comments = ( 19 |

20 | Comments 21 | 22 | {content.comments} 23 | 24 |

25 | ); 26 | } 27 | link = ( 28 |

29 | Link 30 | 31 | {content.link} 32 | 33 |

34 | ); 35 | body = ( 36 | 38 | ); 39 | linksDivider = ; 40 | } else if (content.type === "youtube") { 41 | body = ( 42 | 43 | 13 | 14 | """ 15 | 16 | 17 | class ArticleCleanerTest(unittest.TestCase): 18 | 19 | def setUp(self): 20 | self.sample = SAMPLE.split('\n') 21 | self.url = 'https://test.te' 22 | 23 | @patch('jarr.lib.url_cleaners.is_secure_served') 24 | def test_clean_clear(self, is_secure_served): 25 | is_secure_served.return_value = False 26 | result = clean_urls(SAMPLE, self.url).split('\n') 27 | self.assertEqual('', 28 | result[0]) 29 | self.assertEqual(self.sample[1], result[1]) # unchanged 30 | self.assertEqual(self.sample[2], result[2]) # unchanged 31 | self.assertEqual(self.sample[3], result[3]) # unchanged 32 | self.assertEqual(self.sample[4], result[4]) # unchanged 33 | self.assertEqual(self.sample[6], result[6]) # unchanged 34 | self.assertEqual('' % self.url, result[7]) 35 | 36 | @patch('jarr.lib.url_cleaners.is_secure_served') 37 | def test_clean_https(self, is_secure_served): 38 | is_secure_served.return_value = True 39 | result = clean_urls(SAMPLE, self.url).split('\n') 40 | self.assertEqual('', 41 | result[0]) 42 | self.assertEqual(self.sample[1], result[1]) # unchanged 43 | self.assertEqual(self.sample[2], result[2]) # unchanged 44 | self.assertEqual(self.sample[3], result[3]) # unchanged 45 | self.assertEqual( 46 | '