├── .dockerignore ├── .gitignore ├── Dockerfile ├── Dockerfile_requirements ├── LICENSE ├── Makefile ├── Makefile.docker ├── README.md ├── README_datamanagement.md ├── README_development.md ├── README_install.md ├── README_search.md ├── README_ui.md ├── analysis ├── github_notrot.md ├── github_rot.md ├── readme.md ├── substack_notrot.md └── substack_rot.md ├── backup.py ├── config.doxygen ├── convertjson2db.py ├── dataanalyzer.py ├── dbfilter.py ├── docker-compose.yml ├── docker-entrypoint.sh ├── docker └── raspberry │ ├── Dockerfile │ ├── docker-compose.yml │ ├── docker-entrypoint.sh │ └── requirements.txt ├── example_asyncio.py ├── example_compare_crawlers.py ├── example_page_crawler.py ├── examples ├── start_server.sh ├── startserver.bat ├── test.bat └── test.sh ├── exporter.py ├── images ├── django.kra ├── django.png ├── django_400.png ├── old-wild-west.jpg ├── old-wild-west.jpg~ ├── old-wild-west.kra ├── old-wild-west.kra~ └── sunset-5511638_1280.jpg ├── importer.py ├── init_browser_setup.json ├── init_sources_crawler.json ├── init_sources_music.json ├── init_sources_news.json ├── linklibrary ├── __init__.py ├── __init__celery.py ├── celery_example.py ├── settings_template_postgres.py ├── settings_template_postgres_celery.py ├── settings_template_sqlite.py ├── settings_template_sqlite_celery.py ├── urls_example.py └── wsgi.py ├── manage.py ├── page_props.py ├── poetry.lock ├── pyproject.toml ├── rabbitmq.conf ├── requirements.txt ├── rsshistory ├── __init__.py ├── admin.py ├── apps.py ├── configuration.py ├── controllers │ ├── __init__.py │ ├── backgroundjob.py │ ├── comments.py │ ├── domains.py │ ├── entries.py │ ├── entriesutils.py │ ├── entrycleanup.py │ ├── entrydatabuilder.py │ ├── entryupdater.py │ ├── entrywrapper.py │ ├── modelfiles.py │ ├── searchengines.py │ ├── sources.py │ ├── system.py │ └── wizards.py ├── datawriter.py ├── forms.py ├── management │ ├── __init__.py │ └── commands │ │ ├── __init__.py │ │ └── threadprocessor.py ├── models │ ├── __init__.py │ ├── apikeys.py │ ├── backgroundjob.py │ ├── blockentry.py │ ├── browser.py │ ├── credentials.py │ ├── domains.py │ ├── entries.py │ ├── entryrules.py │ ├── export.py │ ├── gateway.py │ ├── keywords.py │ ├── modelfiles.py │ ├── readlater.py │ ├── readmarkers.py │ ├── searchview.py │ ├── social.py │ ├── sources.py │ ├── system.py │ ├── useractions.py │ └── userhistory.py ├── pluginsources │ ├── __init__.py │ ├── codeprojectplugin.py │ ├── emailsourceplugin.py │ ├── hackernewsparserplugin.py │ ├── rssparserplugin.py │ ├── sourcecontrollerbuilder.py │ ├── sourcegenericplugin.py │ ├── sourcejsonplugin.py │ ├── sourceparseplugin.py │ ├── sourceplugininterface.py │ ├── sourcerssplugin.py │ ├── sourceurlinterface.py │ ├── spotifyplugin.py │ └── tvn24plugin.py ├── pluginurl │ ├── __init__.py │ ├── entryurlinterface.py │ └── urlhandler.py ├── queryfilters.py ├── repositoryfactory.py ├── serializers │ ├── __init__.py │ ├── domainexporter.py │ ├── entriesexporter.py │ ├── entrydailydataexpoter.py │ ├── entrynotimedataexporter.py │ ├── entryyeardataexporter.py │ ├── instanceimporter.py │ ├── jsonimporter.py │ ├── keywordexporter.py │ ├── servicedatamarginaliacrawleroutput.py │ └── sourcesserializer.py ├── static │ ├── bootstrap │ │ ├── bootstrap.bundle5.3.3.min.js │ │ ├── bootstrap5.3.3.min.css │ │ ├── jquery-3.7.1.min.js │ │ └── popper1.14.7.min.js │ └── rsshistory │ │ ├── config.js │ │ ├── css │ │ ├── styles-mobile.css_style-dark.css │ │ ├── styles-mobile.css_style-light.css │ │ ├── styles.css_style-dark.css │ │ └── styles.css_style-light.css │ │ ├── entries_library.js │ │ ├── handlers.js │ │ ├── icons │ │ ├── account.png │ │ ├── archive.org.ico │ │ ├── archive.org.save.ico │ │ ├── favicon.ico │ │ ├── icons8-add-link-96.png │ │ ├── icons8-back-100.png │ │ ├── icons8-bookmark-100.png │ │ ├── icons8-bookmarks-100.png │ │ ├── icons8-broadcast-100.png │ │ ├── icons8-broadcast-add-100.png │ │ ├── icons8-channel-add-96.png │ │ ├── icons8-configuration-67.png │ │ ├── icons8-database-export-100.png │ │ ├── icons8-down-100.png │ │ ├── icons8-download-96.png │ │ ├── icons8-download-music-96.png │ │ ├── icons8-download-page-96.png │ │ ├── icons8-download-video-96.png │ │ ├── icons8-drop-down-100.png │ │ ├── icons8-edit-100.png │ │ ├── icons8-external-link-128.png │ │ ├── icons8-ftp-96.png │ │ ├── icons8-heart-monitor-100.png │ │ ├── icons8-hide-96.png │ │ ├── icons8-home-96.png │ │ ├── icons8-letters-96.png │ │ ├── icons8-link-90.png │ │ ├── icons8-locked-100.png │ │ ├── icons8-login-100.png │ │ ├── icons8-logout-100.png │ │ ├── icons8-music-96.png │ │ ├── icons8-nas-96.png │ │ ├── icons8-new-100.png │ │ ├── icons8-not-bookmark-100.png │ │ ├── icons8-paper-64.png │ │ ├── icons8-pause-squared-100.png │ │ ├── icons8-play-100.png │ │ ├── icons8-plus-100.png │ │ ├── icons8-radar-64.png │ │ ├── icons8-rate-100.png │ │ ├── icons8-save-100.png │ │ ├── icons8-schedule-100.png │ │ ├── icons8-search-100.png │ │ ├── icons8-show-100.png │ │ ├── icons8-skull-100.png │ │ ├── icons8-start-100.png │ │ ├── icons8-stop-100.png │ │ ├── icons8-tags-100.png │ │ ├── icons8-translate-128.png │ │ ├── icons8-trash-100.png │ │ ├── icons8-trash-multiple-100.png │ │ ├── icons8-tv-show-32.png │ │ ├── icons8-unlocked-96.png │ │ ├── icons8-up-100.png │ │ ├── icons8-update-100.png │ │ ├── icons8-update-skull-100.png │ │ ├── icons8-video-96.png │ │ ├── icons8-view-details-100.png │ │ ├── icons8-www-64.png │ │ ├── icons8-youtube-music-96.png │ │ └── rss.gif │ │ ├── images │ │ ├── piesel.jpg │ │ └── sign-304093_640.png │ │ ├── library.js │ │ ├── project_library.js │ │ └── test │ │ ├── test.entries.js │ │ └── test.sh ├── tasks.py ├── templates │ ├── auth │ │ └── user_list.html │ ├── registration │ │ ├── login.html │ │ └── logout.html │ ├── robots.txt │ └── rsshistory │ │ ├── about.html │ │ ├── about_element.html │ │ ├── admin_page.html │ │ ├── apikeys_list.html │ │ ├── applogging_list.html │ │ ├── appusers.html │ │ ├── backgroundjob_list.html │ │ ├── backgroundjob_list__script.js │ │ ├── backgroundjob_list_actions.html │ │ ├── base_footer.html │ │ ├── base_generic.html │ │ ├── base_head.html │ │ ├── base_menu.html │ │ ├── base_script.js │ │ ├── blockentry_list.html │ │ ├── blockentrylist_list.html │ │ ├── browser_list.html │ │ ├── categories_list.html │ │ ├── credentials_detail.html │ │ ├── credentials_list.html │ │ ├── data_errors.html │ │ ├── dataexport_detail.html │ │ ├── dataexport_list.html │ │ ├── domain_category_list.html │ │ ├── domain_help_element.html │ │ ├── domains_detail.html │ │ ├── domains_list.html │ │ ├── domains_list__script.js │ │ ├── entries_import_summary.html │ │ ├── entries_untagged.html │ │ ├── entry__add_simple.html │ │ ├── entry__add_simple__script.js │ │ ├── entry_add__form.html │ │ ├── entry_added.html │ │ ├── entry_detail.html │ │ ├── entry_detail__comments.html │ │ ├── entry_detail__dynamic.html │ │ ├── entry_detail__frame.html │ │ ├── entry_detail__script.js │ │ ├── entry_detail__source.html │ │ ├── entry_detail__tag_form.html │ │ ├── entry_detail__tags.html │ │ ├── entry_detail__vote_form.html │ │ ├── entry_list.html │ │ ├── entry_list__actions.html │ │ ├── entry_list__script.js │ │ ├── entry_list_display_element.html │ │ ├── entry_thumbnail_element.html │ │ ├── entryrules_detail.html │ │ ├── entryrules_list.html │ │ ├── form_basic.html │ │ ├── form_configuration.html │ │ ├── form_filter_element.html │ │ ├── form_filter_oneliner_element.html │ │ ├── form_multiline_element.html │ │ ├── form_oneliner.html │ │ ├── form_oneliner_element.html │ │ ├── form_search_init.html │ │ ├── form_search_omni.html │ │ ├── form_search_omni_element.html │ │ ├── form_search_syntax_element.html │ │ ├── form_source_add_simple.html │ │ ├── gateways.html │ │ ├── go_back.html │ │ ├── icon_add.html │ │ ├── icon_disable.html │ │ ├── icon_edit.html │ │ ├── icon_enable.html │ │ ├── icon_external.html │ │ ├── icon_link.html │ │ ├── icon_remove.html │ │ ├── icon_source.html │ │ ├── icon_update.html │ │ ├── import_internetarchive.html │ │ ├── index.html │ │ ├── info_users.html │ │ ├── javascript_list_utilities.js │ │ ├── keywords_list.html │ │ ├── missing_rights.html │ │ ├── modelfiles_list.html │ │ ├── page_show_properties.html │ │ ├── page_show_properties__script.js │ │ ├── pagination.html │ │ ├── readlater_list.html │ │ ├── readlater_list__script.js │ │ ├── search_engines.html │ │ ├── search_places_element.html │ │ ├── searchview_detail.html │ │ ├── searchview_list.html │ │ ├── source__add_simple.html │ │ ├── source__add_simple__script.js │ │ ├── source_add__form.html │ │ ├── source_added.html │ │ ├── source_client_reader.html │ │ ├── source_help_element.html │ │ ├── sourcedatacontroller_detail.html │ │ ├── sourcedatacontroller_detail_buttons.html │ │ ├── sources_import_summary.html │ │ ├── sources_list.html │ │ ├── sources_list__actions.html │ │ ├── sources_list__script.js │ │ ├── sources_list__standard.html │ │ ├── summary_present.html │ │ ├── system_status.html │ │ ├── tags_list.html │ │ ├── urls.js │ │ ├── user_config.html │ │ ├── user_configs.html │ │ ├── user_personal.html │ │ ├── userbrowsehistory_list.html │ │ ├── userbrowsehistory_list__script.js │ │ ├── usercommentscontroller_list.html │ │ ├── usersearchhistory_list.html │ │ ├── usertags_list.html │ │ ├── wizard_setup.html │ │ ├── wizard_setup_init.html │ │ └── wizard_setup_init.js ├── tests │ ├── __init__.py │ ├── fake │ │ ├── __init__.py │ │ ├── codeproject.py │ │ ├── firebog.py │ │ ├── geekwirecom.py │ │ ├── githubcom.py │ │ ├── hackernews.py │ │ ├── instance.py │ │ ├── reddit.py │ │ ├── remoteserver.py │ │ ├── returndislike.py │ │ ├── robotstxtcom.py │ │ ├── thehill.py │ │ ├── warhammercommunity.py │ │ └── youtube.py │ ├── fakeinternet.py │ ├── fakeinternetdata.py │ ├── test_applogging.py │ ├── test_backgroundjobcontroller.py │ ├── test_blockentry.py │ ├── test_browser.py │ ├── test_converters.py │ ├── test_dataexport.py │ ├── test_datawriter.py │ ├── test_dateutils.py │ ├── test_domainscontroller.py │ ├── test_entriescleanup.py │ ├── test_entrydatabuilder.py │ ├── test_entrypreviewbuilder.py │ ├── test_entryrules.py │ ├── test_entryscanner.py │ ├── test_entryupdater.py │ ├── test_entryurlinterface.py │ ├── test_entrywrapper.py │ ├── test_gateway.py │ ├── test_gitrepository.py │ ├── test_instanceimporter.py │ ├── test_keywords.py │ ├── test_linkdatacontroller.py │ ├── test_modelfiles.py │ ├── test_omnisearch.py │ ├── test_pagedisplay.py │ ├── test_queryfilters.py │ ├── test_readmarkers.py │ ├── test_searchengines.py │ ├── test_searchviews.py │ ├── test_serializers.py │ ├── test_serializers_entrydailydataexpoter.py │ ├── test_serializers_entryyeardataexporter.py │ ├── test_serializers_jsonimporter.py │ ├── test_serializers_mainexporter.py │ ├── test_service_internetarchive.py │ ├── test_service_translate.py │ ├── test_social.py │ ├── test_sourcedatabuilder.py │ ├── test_sourcedatacontroller.py │ ├── test_sourceexporthistory.py │ ├── test_sourceplugins_json.py │ ├── test_sourceplugins_other.py │ ├── test_sourceplugins_parse.py │ ├── test_sourceplugins_rss.py │ ├── test_sourceurlinterface.py │ ├── test_systemoperation.py │ ├── test_threadhandlers.py │ ├── test_threadprocessors.py │ ├── test_updatemgr.py │ ├── test_urlhandler.py │ ├── test_useractions.py │ ├── test_userconfig.py │ ├── test_userhistory.py │ ├── test_views.py │ ├── test_views_apikeys.py │ ├── test_views_applogging.py │ ├── test_views_backgroundjobs.py │ ├── test_views_base.py │ ├── test_views_blocklists.py │ ├── test_views_comments.py │ ├── test_views_credentials.py │ ├── test_views_data_export.py │ ├── test_views_domains.py │ ├── test_views_entries.py │ ├── test_views_entryrules.py │ ├── test_views_export.py │ ├── test_views_modelfiles.py │ ├── test_views_readlater.py │ ├── test_views_readmarkers.py │ ├── test_views_searchviews.py │ ├── test_views_social.py │ ├── test_views_sources.py │ ├── test_views_system.py │ ├── test_views_tags.py │ ├── test_views_tools.py │ ├── test_views_userhistory.py │ ├── test_views_users.py │ └── test_views_votes.py ├── threadhandlers.py ├── threadprocessors.py ├── updatemgr.py ├── urls.py ├── views.py ├── viewspkg │ ├── __init__.py │ ├── apikeys.py │ ├── backgroundjobs.py │ ├── blockentry.py │ ├── browsers.py │ ├── comments.py │ ├── credentials.py │ ├── custom.py │ ├── domains.py │ ├── entries.py │ ├── entryrules.py │ ├── export.py │ ├── keywords.py │ ├── modelfiles.py │ ├── plugins │ │ ├── __init__.py │ │ ├── entrygenericplugin.py │ │ ├── entryodyseeplugin.py │ │ ├── entrypreviewbuilder.py │ │ └── entryyoutubeplugin.py │ ├── readlater.py │ ├── readmarkers.py │ ├── searchviews.py │ ├── social.py │ ├── sources.py │ ├── system.py │ ├── tools.py │ ├── useractions.py │ ├── userhistory.py │ └── users.py └── webtools │ ├── __init__.py │ ├── crawlers │ ├── __init__.py │ ├── crawlerinterface.py │ └── crawlers.py │ ├── crawlerscript.py │ ├── handlers │ ├── __init__.py │ ├── defaulturlhandler.py │ ├── handlerchannelodysee.py │ ├── handlerchannelyoutube.py │ ├── handlerhttppage.py │ ├── handlerinterface.py │ ├── handlers.py │ ├── handlervideoodysee.py │ └── handlervideoyoutube.py │ ├── url.py │ ├── webconfig.py │ └── webtools.py ├── screenshots ├── admin_view.png ├── backgroundjobs_view.PNG ├── browser_list.PNG ├── browsers.PNG ├── configuration_form.PNG ├── dark_theme.PNG ├── domains.PNG ├── entries_list_search_engine.PNG ├── entries_list_standard.PNG ├── entries_list_youtube.PNG ├── entry_details.PNG ├── entry_new.PNG ├── entry_new_simple.PNG ├── index.PNG ├── keywords_view.PNG ├── logs_view.PNG ├── page_properties.PNG ├── search_engines.PNG ├── search_form.PNG ├── server_status.PNG ├── source_details.PNG ├── source_list.PNG ├── source_new.PNG ├── tags_view.PNG ├── tools.PNG └── user_configuration_view.PNG ├── todo.md ├── utils ├── __init__.py ├── alchemysearch.py ├── basictypes.py ├── controllers │ ├── __init__.py │ ├── browser.py │ ├── controllers.py │ ├── entries.py │ ├── sources.py │ ├── sourcesreader.py │ └── system.py ├── dateutils.py ├── inputcontent.py ├── logger.py ├── omnisearch.py ├── programwrappers │ ├── __init__.py │ ├── ffmpeg.py │ ├── id3v2.py │ ├── vlc.py │ ├── wget.py │ ├── ytdlp.py │ └── ytdownloader.py ├── reflected.py ├── repositoryinterface.py ├── serializers │ ├── __init__.py │ ├── converters.py │ ├── entriesexporter.py │ ├── htmlexporter.py │ ├── jsonimporter.py │ ├── pagedisplay.py │ └── youtubelinkjson.py ├── services │ ├── __init__.py │ ├── emailreader.py │ ├── gitrepository.py │ ├── internetarchive.py │ ├── openrss.py │ ├── servicedatareadinglist.py │ ├── translate.py │ ├── validators.py │ └── waybackmachine.py ├── sqlmodel.py └── systemmonitoring.py └── workspace.py /.dockerignore: -------------------------------------------------------------------------------- 1 | Dockerfile 2 | .dockerignore 3 | .git 4 | .gitignore 5 | .gitattributes 6 | -------------------------------------------------------------------------------- /Dockerfile_requirements: -------------------------------------------------------------------------------- 1 | # Adapted from https://gist.github.com/soof-golan/6ebb97a792ccd87816c0bda1e6e8b8c2 2 | # This is minimal startup of django app, which uses SQL lite, and no background task like celery 3 | 4 | FROM python:3.9 as python-base 5 | 6 | # Create stage for Poetry installation 7 | FROM python-base as poetry-base 8 | 9 | # Creating a virtual environment just for poetry and install it with pip 10 | RUN python3 -m venv $POETRY_VENV \ 11 | && $POETRY_VENV/bin/pip install -U pip setuptools 12 | 13 | # Create a new stage from the base python image 14 | FROM python-base as example-app 15 | 16 | # Add Poetry to PATH 17 | ENV PATH="${PATH}:${POETRY_VENV}/bin" 18 | 19 | WORKDIR /app 20 | 21 | # Copy Dependencies 22 | COPY requirements.txt ./ 23 | 24 | RUN pip install -r requirements.txt 25 | 26 | # spacy needs a file to be downloaded 27 | RUN python3 -m spacy download en_core_web_sm 28 | # playwright needs a browser 29 | RUN playwright install 30 | 31 | # Copy Application 32 | COPY . /app 33 | 34 | # Copy the custom settings template 35 | COPY ./linklibrary/settings_template_postgres_celery.py /app/linklibrary/settings.py 36 | RUN mkdir -p /app/linklibrary/rsshistory/migrations 37 | RUN touch /app/linklibrary/rsshistory/migrations/__init__.py 38 | 39 | # TODO Copy chromedriver to /usr/local/bin 40 | RUN apt-get -y update && apt-get -y upgrade && apt-get install -y --no-install-recommends ffmpeg id3v2 wget xvfb 41 | 42 | # Expose the port that Django will run on 43 | EXPOSE 8000 44 | 45 | RUN ["chmod", "+x", "/app/docker-entrypoint.sh"] 46 | 47 | # TODO how to kill server? 48 | ENTRYPOINT ["/app/docker-entrypoint.sh"] 49 | 50 | # Run the Django application using Poetry 51 | #CMD ["poetry", "run", "python", "manage.py", "runserver", "0.0.0.0:8000"] 52 | #CMD ["/app/docker-entrypoint.sh"] 53 | 54 | # can be further enhanced with 55 | # https://testdriven.io/blog/dockerizing-django-with-postgres-gunicorn-and-nginx/ 56 | # https://stackoverflow.com/questions/33992867/how-do-you-perform-django-database-migrations-when-using-docker-compose 57 | -------------------------------------------------------------------------------- /README_datamanagement.md: -------------------------------------------------------------------------------- 1 | This document provides information how data can be backed up, and restored. 2 | 3 | To backup, and restore data use backup.py script. It can do both 4 | 5 | # Backup 6 | 7 | To backup using postgreSQL custom format (fastest option). 8 | 9 | ``` 10 | poetry run python backup.py -U -d -p -b -f custom 11 | ``` 12 | 13 | To backup using postgreSQL sqlite format. 14 | 15 | ``` 16 | poetry run python backup.py -U -d -p -b -f sqlite 17 | ``` 18 | 19 | Can be performed for individual workspace 20 | 21 | ``` 22 | poetry run python backup.py -U -d -p -b -f sqlite -w rsshistory 23 | 24 | ``` 25 | 26 | # Restore 27 | 28 | Fastest option - use custom format 29 | 30 | ``` 31 | poetry run python backup.py -U -d -p -r -f custom -w rsshistory 32 | ``` 33 | 34 | Can be done with sqlite table. 35 | 36 | ``` 37 | poetry run python backup.py -U -d -p -r -f sqlite -w rsshistory 38 | ``` 39 | 40 | Append switch can be supplied to not clean table at start 41 | 42 | ``` 43 | poetry run python backup.py -U -d -p -r -f sqlite --append -w rsshistory 44 | ``` 45 | 46 | # Test connection 47 | ``` 48 | psql -U -d -p -h 127.0.0.1 49 | ``` 50 | 51 | # Tools 52 | 53 | Tables can be reindexed 54 | ``` 55 | poetry run python backup.py -U -d -p --reindex 56 | ``` 57 | 58 | Squences on ids can be reset 59 | ``` 60 | poetry run python backup.py -U -d -p --sequence-update 61 | ``` 62 | -------------------------------------------------------------------------------- /analysis/readme.md: -------------------------------------------------------------------------------- 1 | # Analysis results 2 | 3 | Analysis is performed on my personal link archive instance. 4 | 5 | Most of links is provided by Hacker news, and Code project. Most of RSS sources provide data for the RSS source domain. HN and CodeProject provide links outside of their domains. 6 | 7 | - [list of personal sites](https://raw.githubusercontent.com/rumca-js/RSS-Link-Database-2023/master/domains_personal.json) 8 | - github pages that have rotten [https://raw.githubusercontent.com/rumca-js/Django-link-archive/main/analysis/github_rot.md](https://raw.githubusercontent.com/rumca-js/Django-link-archive/main/analysis/github_rot.md) 9 | - github pages that not have rotten [https://raw.githubusercontent.com/rumca-js/Django-link-archive/main/analysis/github_notrot.md](https://raw.githubusercontent.com/rumca-js/Django-link-archive/main/analysis/github_notrot.md) 10 | - substack pages that have rotten [https://raw.githubusercontent.com/rumca-js/Django-link-archive/main/analysis/substack_rot.md](https://raw.githubusercontent.com/rumca-js/Django-link-archive/main/analysis/substack_rot.md) 11 | - substack pages that not have rotten [https://raw.githubusercontent.com/rumca-js/Django-link-archive/main/analysis/substack_notrot.md](https://raw.githubusercontent.com/rumca-js/Django-link-archive/main/analysis/substack_notrot.md) 12 | 13 | # Ideas 14 | 15 | We can do much more! We can check and analyse much more! 16 | 17 | - analysis of links: how many of old links are not any longer valid 18 | - analysis of RSS source: how often it publishes data 19 | - analysis of RSS source: what kind of data it produces, is it reliable 20 | - analysis of RSS source: is it a content farm, does it contain many links outside of the domain? 21 | - analysis of domains: is the domain correctly configured? 22 | - analysis of topics: who was the first to report on certain topic 23 | - analysis of topics: which source uses which words? For example it seems that left leaning sites, and white leaning sites have a different vocublary. There are different kind of words and ideads in. With this file history, you can analyze which sites have which ideas 24 | -------------------------------------------------------------------------------- /analysis/substack_rot.md: -------------------------------------------------------------------------------- 1 | 2 | # All github domains that have rotten (2023.09.01) 3 | 4 | search equation = "domain = substack.com & dead = 1" 5 | 6 | - [DEAD] askjerry.substack.com Not Found 7 | - [DEAD] codeofhonor.substack.com Not Found 8 | - [DEAD] gaylelaakmann.substack.com Not Found 9 | - [DEAD] satfax.substack.com Not Found 10 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.7' 2 | services: 3 | djangolinkarchive: 4 | image: rozbujnik/django-link-archive 5 | container_name: djangolinkarchive 6 | build: 7 | context: . 8 | dockerfile: Dockerfile 9 | ports: 10 | - 8000:8000 11 | depends_on: 12 | dbserver: 13 | condition: service_healthy 14 | environment: 15 | # could be localhost 16 | DB_SERVER: dbserver 17 | DB_DB: control 18 | DB_USER: power 19 | DB_PASSWORD: notexample 20 | ALLOWED_IP: "192.168.0.1" 21 | DJANGO_SUPERUSER_USERNAME: admin 22 | DJANGO_SUPERUSER_PASSWORD: admin 23 | DJANGO_INITIALIZE_SOURCES: 1 24 | CRAWLER_BUDDY_SERVER: crawlerbuddy 25 | CRAWLER_BUDDY_PORT: 3000 26 | SECRET_KEY: "xxxxxxxxxxxxx" 27 | DEBUG: 1 28 | networks: 29 | - shared_network 30 | 31 | dbserver: 32 | image: postgres 33 | restart: always 34 | environment: 35 | POSTGRES_DB: control 36 | POSTGRES_USER: power 37 | POSTGRES_PASSWORD: notexample 38 | healthcheck: 39 | test: ["CMD-SHELL", "pg_isready -U power -d control"] 40 | interval: 5s 41 | timeout: 5s 42 | retries: 5 43 | networks: 44 | - shared_network 45 | 46 | crawlerbuddy: 47 | image: rozbujnik/crawler-buddy 48 | container_name: crawlerbuddy 49 | ports: 50 | - 3000:3000 51 | environment: 52 | CRAWLER_BUDDY_PORT: 3000 53 | networks: 54 | - shared_network 55 | 56 | networks: 57 | shared_network: 58 | driver: bridge 59 | -------------------------------------------------------------------------------- /docker/raspberry/Dockerfile: -------------------------------------------------------------------------------- 1 | # Adapted from https://gist.github.com/soof-golan/6ebb97a792ccd87816c0bda1e6e8b8c2 2 | # This is minimal startup of django app, which uses SQL lite, and no background task like celery 3 | 4 | FROM python:3.9 as python-base 5 | 6 | FROM python-base as poetry-base 7 | 8 | # Creating a virtual environment just for poetry and install it with pip 9 | RUN python3 -m venv $POETRY_VENV \ 10 | && $POETRY_VENV/bin/pip install -U pip setuptools 11 | 12 | # Create a new stage from the base python image 13 | FROM python-base as example-app 14 | 15 | # Add Poetry to PATH 16 | ENV PATH="${PATH}:${POETRY_VENV}/bin" 17 | 18 | WORKDIR /app 19 | 20 | COPY requirements.txt ./ 21 | 22 | RUN pip install -r requirements.txt 23 | 24 | # spacy needs a file to be downloaded 25 | # RUN python3 -m spacy download en_core_web_sm 26 | # playwright needs a browser 27 | # RUN playwright install 28 | 29 | # Copy Application 30 | COPY . /app 31 | 32 | # Copy the custom settings template 33 | COPY ./linklibrary/settings_template_postgres_celery.py /app/linklibrary/settings.py 34 | RUN mkdir -p /app/linklibrary/rsshistory/migrations 35 | RUN touch /app/linklibrary/rsshistory/migrations/__init__.py 36 | 37 | # TODO Copy chromedriver to /usr/local/bin 38 | RUN apt-get -y update && apt-get -y upgrade && apt-get install -y --no-install-recommends ffmpeg id3v2 wget xvfb python3-lxml python3-psycopg2 39 | 40 | # Expose the port that Django will run on 41 | EXPOSE 8000 42 | 43 | RUN ["chmod", "+x", "/app/docker-entrypoint.sh"] 44 | 45 | # TODO how to kill server? 46 | ENTRYPOINT ["/app/docker-entrypoint.sh"] 47 | 48 | # Run the Django application using Poetry 49 | #CMD ["poetry", "run", "python", "manage.py", "runserver", "0.0.0.0:8000"] 50 | #CMD ["/app/docker-entrypoint.sh"] 51 | 52 | # can be further enhanced with 53 | # https://testdriven.io/blog/dockerizing-django-with-postgres-gunicorn-and-nginx/ 54 | # https://stackoverflow.com/questions/33992867/how-do-you-perform-django-database-migrations-when-using-docker-compose 55 | -------------------------------------------------------------------------------- /docker/raspberry/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.7' 2 | services: 3 | djangolinkarchive: 4 | image: rozbujnik/django-link-archive 5 | container_name: djangolinkarchive 6 | build: 7 | context: . 8 | dockerfile: Dockerfile 9 | ports: 10 | - 9092:8000 11 | depends_on: 12 | dbserver: 13 | condition: service_healthy 14 | rabbitmq: 15 | condition: service_started 16 | environment: 17 | # could be localhost 18 | RABBIT_SERVER: rabbitmq 19 | MEMCACHED_SERVER: memcached 20 | MEMCACHED_PORT: 11211 21 | DB_SERVER: dbserver 22 | DB_DB: control 23 | DB_USER: power 24 | DB_PASSWORD: notexample 25 | ALLOWED_IP: "192.168.0.1" 26 | DJANGO_SUPERUSER_USERNAME: admin 27 | DJANGO_SUPERUSER_PASSWORD: admin 28 | DJANGO_INITIALIZE_SOURCES: 1 29 | SECRET_KEY: "xxxxxxxxxxxxx" 30 | DEBUG: 1 31 | networks: 32 | - shared_network 33 | 34 | rabbitmq: 35 | #image: rabbitmq 36 | image: rabbitmq:3-management 37 | ports: 38 | - 5672:5672 39 | volumes: 40 | - ./rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf 41 | depends_on: 42 | dbserver: 43 | condition: service_healthy 44 | networks: 45 | - shared_network 46 | 47 | memcached: 48 | image: memcached:latest 49 | ports: 50 | - "11211:11211" 51 | networks: 52 | - shared_network 53 | 54 | dbserver: 55 | image: postgres 56 | restart: always 57 | environment: 58 | POSTGRES_DB: control 59 | POSTGRES_USER: power 60 | POSTGRES_PASSWORD: notexample 61 | healthcheck: 62 | test: ["CMD-SHELL", "pg_isready -U power -d control"] 63 | interval: 5s 64 | timeout: 5s 65 | retries: 5 66 | networks: 67 | - shared_network 68 | 69 | networks: 70 | shared_network: 71 | driver: bridge 72 | -------------------------------------------------------------------------------- /docker/raspberry/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | 4 | if ! test -f /app/linklibrary/initialized.txt; then 5 | mkdir -p /app/linklibrary/rsshistory/migrations 6 | touch /app/linklibrary/rsshistory/migrations/__init__.py 7 | 8 | # Collect static files 9 | echo "Collect static files" 10 | python3 manage.py collectstatic --noinput 11 | 12 | # Apply database migrations 13 | echo "Apply database migrations" 14 | python3 manage.py makemigrations 15 | python3 manage.py migrate auth 16 | python3 manage.py migrate django_celery_results 17 | echo "Apply database clean migrate" 18 | python3 manage.py migrate 19 | echo "Apply database clean migrate - rsync" 20 | python3 manage.py migrate --run-syncdb 21 | 22 | echo "Creating user" 23 | if [ -z "${DJANGO_SUPERUSER_USERNAME}" ]; then 24 | FOO_USER='admin' 25 | else 26 | FOO_USER=${DJANGO_SUPERUSER_USERNAME} 27 | fi 28 | 29 | if [ -z "${DJANGO_SUPERUSER_PASSWORD}" ]; then 30 | export DJANGO_SUPERUSER_PASSWORD='admin' 31 | else 32 | export FOO_PASSWORD=${DJANGO_SUPERUSER_PASSWORD} 33 | fi 34 | 35 | if [ -z "${DJANGO_SUPERUSER_EMAIL}" ]; then 36 | export DJANGO_SUPERUSER_EMAIL='no@email.com' 37 | fi 38 | 39 | python3 manage.py createsuperuser \ 40 | --noinput \ 41 | --username $FOO_USER \ 42 | --email "${DJANGO_SUPERUSER_EMAIL}" \ 43 | 44 | fi 45 | 46 | touch /app/linklibrary/initialized.txt 47 | mkdir -p /app/linklibrary/lesson-11/broker/queue 48 | 49 | echo "Starting celery" 50 | rm -rf celerybeat-schedule.db 51 | celery -A linklibrary beat -l INFO & 52 | celery -A linklibrary worker -l INFO --concurrency=4 --max-memory-per-child=100000 & 53 | 54 | echo "Starting web server" 55 | python3 manage.py runserver 0.0.0.0:8000 56 | 57 | echo "Now connect to docker, and define super super" 58 | -------------------------------------------------------------------------------- /docker/raspberry/requirements.txt: -------------------------------------------------------------------------------- 1 | Django >= "4.2.17" 2 | django-user-agents >= "0.4.0" 3 | django-celery-results >= "2.5.1" 4 | pymemcache >= "4.0.0" 5 | celery >= "5.4.0" 6 | # feedparser >= "6.0.10" 7 | python-dateutil >= "2.8.2" 8 | yt-dlp >= "2024.12.6" 9 | waybackpy >= "3.0.6" 10 | sqlalchemy >= "2.0.34" 11 | tldextract >= "5.1.2" 12 | beautifulsoup4 >= "4.12.3" 13 | # sympy >= "1.13.2" 14 | pytz >= "2024.2" 15 | psycopg2-binary 16 | lxml 17 | 18 | # [optional] - at least one of methods need to be running 19 | # example: crawlerrequests.py 20 | requests >= "2.32.3" 21 | 22 | # [optional] 23 | # stealth_requests 24 | 25 | # [optional] use things below if you want to use selenium 26 | # example: crawlerseleniumfull.py, crawlerseleniumheadless.py 27 | selenium>="4.27.1" 28 | # chardet >= "5.2.0" 29 | pyvirtualdisplay >= "3.0" 30 | 31 | # [optional] use if you plan to use it 32 | # Note: does not work on raspberry pi 33 | # example: crawlerseleniumundetected.py 34 | # undetected-chromedriver >= "3.5.5" 35 | 36 | # [optional] use things below if you want to use crawlee 37 | # example: crawleebeautifulsoup.py, crawleeplaywright.py 38 | # crawlee>="0.4.5" 39 | 40 | # [optional] use things below if you want to use seleniumbase 41 | # example: crawlerseleniumbase.py 42 | # seleniumbase="^4.30.3" 43 | 44 | # [optional] use things below if you want to use botasaurus 45 | # example: todo 46 | # botasaurus-driver 47 | # botasaurus="*" 48 | 49 | # [optional] use things below if you want to use scrapy 50 | # example: todo 51 | # https://github.com/geekan/scrapy-examples 52 | # scrapy="*" 53 | -------------------------------------------------------------------------------- /example_asyncio.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple scraping script. 3 | """ 4 | import socket 5 | import json 6 | import traceback 7 | import asyncio 8 | import time 9 | import requests 10 | 11 | from webtools import ( 12 | Url, 13 | fetch_url, 14 | fetch_all_urls, 15 | WebConfig, 16 | HttpPageHandler, 17 | ) 18 | 19 | 20 | __version__ = "0.0.1" 21 | 22 | 23 | async def main(): 24 | WebConfig.use_print_logging() 25 | 26 | # TODO - there seems to be some problems with feedparser, when used with asyncio threads 27 | # module 'xml.sax.expatreader' has no attribute 'create_parser' 28 | 29 | links = [ 30 | "https://www.youtube.com/feeds/videos.xml?channel_id=UCXGgrKt94gR6lmN4aN3mYTg", 31 | "https://www.youtube.com/feeds/videos.xml?channel_id=UCyl5V3-J_Bsy3x-EBCJwepg", 32 | "https://www.youtube.com/feeds/videos.xml?channel_id=UCmrLCXSDScliR7q8AxxjvXg", 33 | "https://www.youtube.com/feeds/videos.xml?channel_id=UC7vVhkEfw4nOGp8TyDk7RcQ", 34 | "https://www.youtube.com/feeds/videos.xml?channel_id=UClozNP-QPyVatzpGKC25s0A", 35 | "https://www.youtube.com/feeds/videos.xml?channel_id=UCld68syR8Wi-GY_n4CaoJGA", 36 | "https://www.youtube.com/feeds/videos.xml?channel_id=UCROQqK3_z79JuTetNP3pIXQ", 37 | ] 38 | 39 | results = await fetch_all_urls(links) 40 | for result in results: 41 | response = result.get_response() 42 | print("{} {}".format(response.url, response.status_code)) 43 | 44 | 45 | if __name__ == "__main__": 46 | start_time = time.time() 47 | asyncio.run(main()) 48 | print(f"Done in {time.time() - start_time} seconds") 49 | -------------------------------------------------------------------------------- /examples/start_server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | make run 4 | -------------------------------------------------------------------------------- /examples/startserver.bat: -------------------------------------------------------------------------------- 1 | poetry run python manage.py runserver 0.0.0.0:8080 2 | -------------------------------------------------------------------------------- /examples/test.bat: -------------------------------------------------------------------------------- 1 | poetry run python manage.py test rsshistory -v 2 2 | -------------------------------------------------------------------------------- /examples/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | make test 4 | -------------------------------------------------------------------------------- /images/django.kra: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/images/django.kra -------------------------------------------------------------------------------- /images/django.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/images/django.png -------------------------------------------------------------------------------- /images/django_400.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/images/django_400.png -------------------------------------------------------------------------------- /images/old-wild-west.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/images/old-wild-west.jpg -------------------------------------------------------------------------------- /images/old-wild-west.jpg~: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/images/old-wild-west.jpg~ -------------------------------------------------------------------------------- /images/old-wild-west.kra: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/images/old-wild-west.kra -------------------------------------------------------------------------------- /images/old-wild-west.kra~: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/images/old-wild-west.kra~ -------------------------------------------------------------------------------- /images/sunset-5511638_1280.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/images/sunset-5511638_1280.jpg -------------------------------------------------------------------------------- /init_browser_setup.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "crawler": "RequestsCrawler", 4 | "enabled": true, 5 | "name": "RequestsCrawler", 6 | "settings": { 7 | "timeout_s": 20 8 | } 9 | }, 10 | { 11 | "crawler": "ScriptCrawler", 12 | "enabled": false, 13 | "name": "CrawleeScript", 14 | "settings": { 15 | "script": "poetry run python crawleebeautifulsoup.py", 16 | "timeout_s": 40 17 | } 18 | }, 19 | { 20 | "crawler": "ScriptCrawler", 21 | "enabled": false, 22 | "name": "PlaywrightScript", 23 | "settings": { 24 | "script": "poetry run python crawleebeautifulsoup.py", 25 | "timeout_s": 40 26 | } 27 | }, 28 | { 29 | "crawler": "SeleniumUndetected", 30 | "enabled": false, 31 | "name": "SeleniumUndetected", 32 | "settings": { 33 | "driver_executable": "/usr/bin/chromedriver", 34 | "timeout_s": 40 35 | } 36 | }, 37 | { 38 | "crawler": "SeleniumBase", 39 | "enabled": false, 40 | "name": "SeleniumBase", 41 | "settings": {} 42 | }, 43 | { 44 | "crawler": "SeleniumChromeHeadless", 45 | "enabled": false, 46 | "name": "SeleniumChromeHeadless", 47 | "settings": { 48 | "driver_executable": "/usr/bin/chromedriver", 49 | "timeout_s": 40 50 | } 51 | }, 52 | { 53 | "crawler": "SeleniumChromeFull", 54 | "enabled": true, 55 | "name": "SeleniumChromeFull", 56 | "settings": { 57 | "driver_executable": "/usr/bin/chromedriver", 58 | "timeout_s": 40 59 | } 60 | }, 61 | { 62 | "crawler": "StealthRequestsCrawler", 63 | "enabled": false, 64 | "name": "StealthRequestsCrawler", 65 | "settings": { 66 | "timeout_s": 20 67 | } 68 | } 69 | ] 70 | -------------------------------------------------------------------------------- /linklibrary/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/linklibrary/__init__.py -------------------------------------------------------------------------------- /linklibrary/__init__celery.py: -------------------------------------------------------------------------------- 1 | # This will make sure the app is always imported when 2 | # Django starts so that shared_task will use this app. 3 | from .celery import app as celery_app 4 | __all__ = ("celery_app",) 5 | -------------------------------------------------------------------------------- /linklibrary/celery_example.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file should be updated to fit needs of your own project. 3 | 4 | Provides means: 5 | - how to configure basic setup for threads 6 | - for more apps more code needs to be written (depends on your project) 7 | """ 8 | 9 | import os 10 | from contextlib import contextmanager 11 | 12 | from django.core.cache import cache 13 | 14 | from celery import Celery 15 | from celery.utils.log import get_task_logger 16 | import importlib.util 17 | import importlib 18 | import logging 19 | import time 20 | import threading 21 | 22 | # Set the default Django settings module for the 'celery' program. 23 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "linklibrary.settings") 24 | 25 | app = Celery("linklibrary") 26 | 27 | # Using a string here means the worker doesn't have to serialize 28 | # the configuration object to child processes. 29 | # - namespace='CELERY' means all celery-related configuration keys 30 | # should have a `CELERY_` prefix. 31 | app.config_from_object("django.conf:settings", namespace="CELERY") 32 | 33 | # Load task modules from all registered Django apps. 34 | app.autodiscover_tasks() 35 | 36 | 37 | logger = get_task_logger(__name__) 38 | -------------------------------------------------------------------------------- /linklibrary/urls_example.py: -------------------------------------------------------------------------------- 1 | """linklibrary URL Configuration 2 | 3 | The `urlpatterns` list routes URLs to views. For more information please see: 4 | https://docs.djangoproject.com/en/2.2/topics/http/urls/ 5 | Examples: 6 | Function views 7 | 1. Add an import: from my_app import views 8 | 2. Add a URL to urlpatterns: path('', views.home, name='home') 9 | Class-based views 10 | 1. Add an import: from other_app.views import Home 11 | 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') 12 | Including another URLconf 13 | 1. Import the include() function: from django.urls import include, path 14 | 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) 15 | """ 16 | from django.contrib import admin 17 | from django.urls import path 18 | 19 | from django.urls import include 20 | from django.views.generic import RedirectView 21 | from django.conf import settings 22 | from django.conf.urls.static import static 23 | 24 | urlpatterns = [ 25 | path("admin/", admin.site.urls, name="admin"), 26 | path("", RedirectView.as_view(url="rsshistory/")), 27 | path("rsshistory/", include("rsshistory.urls")), 28 | path("robots.txt", RedirectView.as_view(url="rsshistory/robots.txt")), 29 | path("opensearch.xml", RedirectView.as_view(url="rsshistory/opensearch.xml")), 30 | ] + static(settings.STATIC_URL, document_root=settings.STATIC_ROOT) 31 | -------------------------------------------------------------------------------- /linklibrary/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for linklibrary project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/2.2/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "linklibrary.settings") 15 | 16 | application = get_wsgi_application() 17 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Django's command-line utility for administrative tasks.""" 3 | import os 4 | import sys 5 | 6 | 7 | def main(): 8 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "linklibrary.settings") 9 | try: 10 | from django.core.management import execute_from_command_line 11 | except ImportError as exc: 12 | raise ImportError( 13 | "Couldn't import Django. Are you sure it's installed and " 14 | "available on your PYTHONPATH environment variable? Did you " 15 | "forget to activate a virtual environment?" 16 | ) from exc 17 | execute_from_command_line(sys.argv) 18 | 19 | 20 | if __name__ == "__main__": 21 | main() 22 | -------------------------------------------------------------------------------- /page_props.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple scraping script. 3 | """ 4 | import socket 5 | import json 6 | import traceback 7 | import asyncio 8 | import time 9 | import requests 10 | 11 | from rsshistory.webtools import ( 12 | Url, 13 | RssPage, 14 | HtmlPage, 15 | fetch_url, 16 | fetch_all_urls, 17 | WebConfig, 18 | ) 19 | from utils.serializers import PageDisplay, PageDisplayParser 20 | 21 | 22 | __version__ = "0.0.1" 23 | 24 | 25 | async def main(): 26 | WebConfig.init() 27 | # we do not want to be swamped with web requests 28 | # WebConfig.use_print_logging() 29 | 30 | parser = PageDisplayParser() 31 | parser.parse() 32 | 33 | if not parser.args.url: 34 | parser.parser.print_help() 35 | else: 36 | display = PageDisplay(parser.args.url, parser) 37 | 38 | 39 | if __name__ == "__main__": 40 | start_time = time.time() 41 | asyncio.run(main()) 42 | print(f"Done in {time.time() - start_time} seconds") 43 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "linklibrary" 3 | version = "2.21.1" 4 | description = "Link Archive" 5 | authors = ["Iwan Grozny "] 6 | license = "GPL3" 7 | readme = "README.md" 8 | 9 | [tool.poetry.dependencies] 10 | python = "^3.10" 11 | Django = "^5.2.0" 12 | django-user-agents = "^0.4.0" 13 | pymemcache = "^4.0.0" 14 | python-dateutil = "^2.8.2" 15 | yt-dlp = "^2025.9.5" 16 | waybackpy = "^3.0.6" 17 | sqlalchemy = "^2.0.38" 18 | tldextract = "^5.1.2" 19 | sympy = "^1.14.0" 20 | spacy = "3.7.5" 21 | url-cleaner="0.1.5" 22 | 23 | # Do we need this? 24 | pytz = "^2024.2" 25 | # Do we need this? 26 | beautifulsoup4 = "^4.13.3" 27 | 28 | requests = "^2.32.3" 29 | #psycopg2 = "*" # for raspberry install python3-psycopyg2 30 | psycopg2-binary = "*" # for raspberry install python3-psycopyg2 31 | psutil="*" 32 | 33 | # TODO do we need this? 34 | chardet = "^5.2.0" 35 | # TODO do we need this? 36 | lxml="^5.4.0" 37 | brutefeedparser="*" 38 | webtoolkit="0.0.6" 39 | cryptography="^45.0.3" 40 | 41 | [tool.poetry.group.dev.dependencies] 42 | black = "^24.10.0" 43 | py-spy = "^0.4.0" 44 | 45 | [build-system] 46 | requires = ["poetry-core"] 47 | build-backend = "poetry.core.masonry.api" 48 | -------------------------------------------------------------------------------- /rabbitmq.conf: -------------------------------------------------------------------------------- 1 | vm_memory_high_watermark.relative = 0.9 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Django >= "4.2.17" 2 | django-user-agents >= "0.4.0" 3 | django-celery-results >= "2.5.1" 4 | pymemcache >= "4.0.0" 5 | celery >= "5.4.0" 6 | feedparser >= "6.0.10" 7 | python-dateutil >= "2.8.2" 8 | yt-dlp >= "2024.12.6" 9 | waybackpy >= "3.0.6" 10 | sqlalchemy >= "2.0.34" 11 | tldextract >= "5.1.2" 12 | beautifulsoup4 >= "4.12.3" 13 | sympy >= "1.13.2" 14 | spacy >= "3.7.5" 15 | pytz >= "2024.2" 16 | 17 | # [optional] - at least one of methods need to be running 18 | # example: crawlerrequests.py 19 | requests >= "2.32.3" 20 | 21 | # [optional] 22 | stealth_requests = "*" 23 | 24 | # [optional] use things below if you want to use selenium 25 | # example: crawlerseleniumfull.py, crawlerseleniumheadless.py 26 | selenium>="4.27.1" 27 | chardet >= "5.2.0" 28 | psycopg2 29 | pyvirtualdisplay >= "3.0" 30 | 31 | # [optional] use if you plan to use it 32 | # Note: does not work on raspberry pi 33 | # example: crawlerseleniumundetected.py 34 | undetected-chromedriver >= "3.5.5" 35 | 36 | # [optional] use things below if you want to use crawlee 37 | # example: crawleebeautifulsoup.py, crawleeplaywright.py 38 | crawlee>="0.4.5" 39 | playwright>="1.49.1" # 1.18.1 40 | lxml>="5.2.2" 41 | 42 | # [optional] use things below if you want to use seleniumbase 43 | # example: crawlerseleniumbase.py 44 | # seleniumbase="^4.30.3" 45 | 46 | # [optional] use things below if you want to use botasaurus 47 | # example: todo 48 | botasaurus-driver 49 | # botasaurus="*" 50 | 51 | # [optional] use things below if you want to use scrapy 52 | # example: todo 53 | # https://github.com/geekan/scrapy-examples 54 | # scrapy="*" 55 | -------------------------------------------------------------------------------- /rsshistory/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/__init__.py -------------------------------------------------------------------------------- /rsshistory/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | # Register your models here. 4 | -------------------------------------------------------------------------------- /rsshistory/apps.py: -------------------------------------------------------------------------------- 1 | # import atexit 2 | from django.apps import AppConfig 3 | 4 | 5 | def cleanup_on_exit(): 6 | # Your cleanup code here 7 | # print("App Cleanup") 8 | pass 9 | 10 | 11 | class LinkDatabase(AppConfig): 12 | name = "rsshistory" 13 | verbose_name = "Personal link database" 14 | 15 | def ready(self): 16 | # print("App Ready {}".format(LinkDatabase.name)) 17 | # atexit.register(cleanup_on_exit) 18 | pass 19 | 20 | def info(message): 21 | print("[{}] {}".format(LinkDatabase.name, message)) 22 | 23 | def error(message): 24 | print("[{}] {}".format(LinkDatabase.name, message)) 25 | -------------------------------------------------------------------------------- /rsshistory/controllers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mostly controllers for models 3 | """ 4 | 5 | from .sources import ( 6 | SourceDataController, 7 | SourceDataBuilder, 8 | ) 9 | 10 | from .domains import ( 11 | DomainsController, 12 | ) 13 | 14 | from .backgroundjob import ( 15 | BackgroundJobController, 16 | ) 17 | 18 | from .comments import ( 19 | UserCommentsController, 20 | ) 21 | 22 | from .entries import ( 23 | LinkDataController, 24 | ArchiveLinkDataController, 25 | ) 26 | from .entriesutils import ( 27 | EntryContentsCrawler, 28 | EntryPageCrawler, 29 | ) 30 | from .entrywrapper import EntryWrapper 31 | from .entrycleanup import EntriesCleanupAndUpdate, EntriesCleanup 32 | from .entryupdater import EntryUpdater, EntriesUpdater 33 | from .entrydatabuilder import EntryDataBuilder 34 | 35 | from .modelfiles import ( 36 | ModelFilesBuilder, 37 | ) 38 | from .system import ( 39 | SystemOperationController, 40 | ) 41 | 42 | from .searchengines import SearchEngines, SearchEngineGoogle, SearchEngineGoogleCache 43 | 44 | from .wizards import ( 45 | system_setup_for_news, 46 | system_setup_for_gallery, 47 | system_setup_for_search_engine, 48 | common_initialize_entry_rules, 49 | ) 50 | -------------------------------------------------------------------------------- /rsshistory/controllers/modelfiles.py: -------------------------------------------------------------------------------- 1 | import base64 2 | from ..webtools import Url 3 | 4 | from ..models import ModelFiles 5 | from ..configuration import Configuration 6 | 7 | 8 | class ModelFilesBuilder(object): 9 | def __init__(self): 10 | pass 11 | 12 | def build(self, file_name=None): 13 | from ..pluginurl import UrlHandlerEx 14 | 15 | if file_name is None: 16 | return 17 | if file_name == "": 18 | return 19 | 20 | c = Configuration.get_object().config_entry 21 | if not c.enabled_file_support: 22 | return 23 | 24 | p = UrlHandlerEx(url=file_name) 25 | binary_data = p.get_section("Binary") 26 | if not binary_data: 27 | # consume 28 | return True 29 | 30 | if binary_data: 31 | raw_data = binary_data["Contents"] 32 | decoded_bytes = base64.decode(raw_data) 33 | 34 | if not ModelFiles.objects.filter(file_name=file_name).exists(): 35 | ModelFiles.add(file_name, decoded_bytes) 36 | -------------------------------------------------------------------------------- /rsshistory/management/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/management/__init__.py -------------------------------------------------------------------------------- /rsshistory/management/commands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/management/commands/__init__.py -------------------------------------------------------------------------------- /rsshistory/models/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Here only models should be. 3 | 4 | When a model has too much code, some of it is moved to "controllers" 5 | """ 6 | 7 | from .system import ( 8 | ConfigurationEntry, 9 | SystemOperation, 10 | UserConfig, 11 | AppLogging, 12 | AppLoggingController, 13 | ) 14 | from .backgroundjob import ( 15 | BackgroundJob, 16 | BackgroundJobHistory, 17 | ) 18 | from .entries import ( 19 | LinkDataModel, 20 | ArchiveLinkDataModel, 21 | BaseLinkDataModel, 22 | BaseLinkDataController, 23 | ) 24 | from .sources import ( 25 | SourceDataModel, 26 | SourceOperationalData, 27 | SourceCategories, 28 | SourceSubCategories, 29 | ) 30 | from .export import ( 31 | SourceExportHistory, 32 | DataExport, 33 | ) 34 | from .domains import ( 35 | Domains, 36 | DomainsSuffixes, 37 | DomainsTlds, 38 | DomainsMains, 39 | ) 40 | from .keywords import ( 41 | KeyWords, 42 | ) 43 | from .entryrules import ( 44 | EntryRules, 45 | ) 46 | from .apikeys import ( 47 | ApiKeys, 48 | ) 49 | from .credentials import ( 50 | Credentials, 51 | ) 52 | from .useractions import ( 53 | UserVotes, 54 | UserComments, 55 | UserBookmarks, 56 | UserTags, 57 | UserCompactedTags, 58 | CompactedTags, 59 | EntryCompactedTags, 60 | ) 61 | from .userhistory import ( 62 | UserSearchHistory, 63 | UserEntryVisitHistory, 64 | UserEntryTransitionHistory, 65 | ) 66 | from .modelfiles import ModelFiles 67 | 68 | from .readmarkers import ReadMarkers 69 | from .readlater import ReadLater 70 | from .browser import Browser 71 | 72 | from .blockentry import BlockEntryList, BlockEntry, BlockListReader 73 | 74 | from .gateway import Gateway 75 | 76 | from .searchview import SearchView 77 | from .social import SocialData 78 | -------------------------------------------------------------------------------- /rsshistory/models/apikeys.py: -------------------------------------------------------------------------------- 1 | from django.conf import settings 2 | from django.db import models 3 | 4 | from ..apps import LinkDatabase 5 | 6 | 7 | class ApiKeys(models.Model): 8 | key = models.CharField(max_length=1000, null=False) 9 | 10 | user = models.ForeignKey( 11 | settings.AUTH_USER_MODEL, 12 | on_delete=models.CASCADE, 13 | related_name=str(LinkDatabase.name) + "_apikeys", 14 | null=True, 15 | ) 16 | 17 | class Meta: 18 | ordering = ["user"] 19 | -------------------------------------------------------------------------------- /rsshistory/models/credentials.py: -------------------------------------------------------------------------------- 1 | """ 2 | Security is a made up word. 3 | """ 4 | 5 | from cryptography.fernet import Fernet 6 | from django.conf import settings 7 | from django.db import models 8 | 9 | from ..apps import LinkDatabase 10 | 11 | 12 | FERNET_KEY = settings.FERNET_KEY 13 | fernet = Fernet(FERNET_KEY) 14 | 15 | 16 | class Credentials(models.Model): 17 | """ 18 | You can define access to multiple sources here 19 | """ 20 | 21 | name = models.CharField(max_length=1000, blank=True) # github etc 22 | credential_type = models.CharField( 23 | max_length=1000, blank=True 24 | ) # refresh token, auth token, etc. 25 | username = models.CharField(max_length=1000, blank=True) 26 | password = models.CharField(max_length=1000, blank=True) 27 | secret = models.CharField(max_length=1000, blank=True) 28 | token = models.CharField(max_length=1000, blank=True) 29 | 30 | user = models.ForeignKey( 31 | settings.AUTH_USER_MODEL, 32 | on_delete=models.CASCADE, 33 | related_name=str(LinkDatabase.name) + "_credentials", 34 | null=True, 35 | blank=True, 36 | help_text="Owner of credentials", 37 | ) 38 | 39 | class Meta: 40 | ordering = ["user"] 41 | 42 | def encrypt(self): 43 | """Still the better way than Facebook plaintext.""" 44 | if self.secret: 45 | self.secret = fernet.encrypt(self.secret.encode()).decode() 46 | if self.token: 47 | self.token = fernet.encrypt(self.token.encode()).decode() 48 | if self.password: 49 | self.password = fernet.encrypt(self.password.encode()).decode() 50 | 51 | def decrypt(self): 52 | if self.secret: 53 | self.secret = fernet.decrypt(self.secret.encode()).decode() 54 | if self.token: 55 | self.token = fernet.decrypt(self.token.encode()).decode() 56 | if self.password: 57 | self.password = fernet.decrypt(self.password.encode()).decode() 58 | return None 59 | 60 | def __str__(self): 61 | return "{}/{}".format(self.name, self.credential_type) 62 | -------------------------------------------------------------------------------- /rsshistory/models/modelfiles.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | from django.urls import reverse 3 | from ..apps import LinkDatabase 4 | 5 | 6 | class ModelFiles(models.Model): 7 | file_name = models.CharField(max_length=2000, unique=True) 8 | contents = models.BinaryField(max_length=1000000, null=True) # 1MB max 9 | date_created = models.DateTimeField( 10 | auto_now_add=True, 11 | null=True, 12 | help_text="Date when entry was created in the database", 13 | ) 14 | 15 | class Meta: 16 | ordering = ["file_name"] 17 | 18 | def add(file_name, contents): 19 | from ..configuration import Configuration 20 | 21 | config_entry = Configuration.get_object().config_entry 22 | 23 | if not config_entry.enable_file_support: 24 | ModelFiles.objects.all().delete() 25 | return 26 | 27 | files = ModelFiles.objects.filter(file_name=file_name) 28 | if files.exists(): 29 | file = files[0] 30 | file.contents = contents 31 | file.save() 32 | else: 33 | it = ModelFiles.objects.create(file_name=file_name, contents=contents) 34 | ModelFiles.cleanup() 35 | 36 | def cleanup(cfg=None): 37 | max_files = ModelFiles.get_max_files() 38 | 39 | all_files = ModelFiles.objects.all().order_by("date_created") 40 | if all_files.count() > max_files: 41 | how_many = all_files.count() - max_files 42 | for file_index in range(how_many): 43 | all_files[file_index].delete() 44 | 45 | def get_max_files(): 46 | return 200 47 | 48 | def get_url(self): 49 | return reverse("{}:model-file".format(LinkDatabase.name), args=[str(self.id)]) 50 | 51 | def get_size_bytes(self): 52 | if not self.contents: 53 | return 0 54 | 55 | return len(self.contents) 56 | -------------------------------------------------------------------------------- /rsshistory/models/readlater.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | from django.conf import settings 3 | 4 | from ..apps import LinkDatabase 5 | from .entries import LinkDataModel 6 | 7 | 8 | class ReadLater(models.Model): 9 | entry = models.ForeignKey( 10 | LinkDataModel, 11 | on_delete=models.CASCADE, 12 | related_name="read_later", 13 | ) 14 | 15 | user = models.ForeignKey( 16 | settings.AUTH_USER_MODEL, 17 | on_delete=models.CASCADE, 18 | related_name=str(LinkDatabase.name) + "_read_later", 19 | null=True, 20 | ) 21 | -------------------------------------------------------------------------------- /rsshistory/pluginsources/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | By default includes everything that extends behavior. 3 | We can add different site mechanisms, handlers, controllers. 4 | """ 5 | 6 | from .sourcerssplugin import BaseRssPlugin 7 | from .sourceparseplugin import BaseParsePlugin 8 | from .sourcejsonplugin import BaseSourceJsonPlugin 9 | from .sourcegenericplugin import SourceGenericPlugin 10 | 11 | from .codeprojectplugin import CodeProjectPlugin 12 | from .tvn24plugin import TVN24Plugin 13 | from .spotifyplugin import SpotifyPlugin 14 | from .rssparserplugin import RssParserPlugin 15 | from .hackernewsparserplugin import HackerNewsParserPlugin 16 | 17 | from .sourcecontrollerbuilder import SourceControllerBuilder 18 | -------------------------------------------------------------------------------- /rsshistory/pluginsources/codeprojectplugin.py: -------------------------------------------------------------------------------- 1 | from .sourcerssplugin import BaseRssPlugin 2 | from ..models import AppLogging 3 | 4 | 5 | class CodeProjectPlugin(BaseRssPlugin): 6 | PLUGIN_NAME = "CodeProjectPlugin" 7 | 8 | def __init__(self, source_id): 9 | super().__init__(source_id) 10 | self.allow_adding_with_current_time = True 11 | 12 | def enhance(self, props): 13 | """ 14 | TODO unused? 15 | """ 16 | feed_entry = props["feed_entry"] 17 | 18 | props = super().enhance(props) 19 | 20 | if "href" in feed_entry.source: 21 | if feed_entry.source["href"]: 22 | props["link"] = feed_entry.source["href"] 23 | if props["link"].strip() == "": 24 | props["link"] = feed_entry.link 25 | elif feed_entry.source["href"]: 26 | props["link"] = feed_entry.source["url"] 27 | if props["link"].strip() == "": 28 | props["link"] = feed_entry.link 29 | else: 30 | AppLogging.error("Could not find source/url/href in RSS entry") 31 | else: 32 | props["link"] = feed_entry.link 33 | 34 | if props["link"].endswith("/"): 35 | props["link"] = props["link"][:-1] 36 | 37 | return props 38 | -------------------------------------------------------------------------------- /rsshistory/pluginsources/hackernewsparserplugin.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | 4 | from webtoolkit import ContentLinkParser 5 | 6 | from ..models import UserTags 7 | from ..configuration import Configuration 8 | from .sourcerssplugin import BaseRssPlugin 9 | from ..controllers import BackgroundJobController 10 | 11 | 12 | class HackerNewsParserPlugin(BaseRssPlugin): 13 | """ 14 | - We read RSS 15 | - For each item in RSS we find internal links for this source 16 | - For each internal link, we read page, and try to add links from inside 17 | """ 18 | 19 | PLUGIN_NAME = "HackerNewsScannerPlugin" 20 | 21 | def __init__(self, source_id): 22 | super().__init__(source_id) 23 | 24 | def get_entries(self): 25 | props = super().get_entries() 26 | list_props = list(props) 27 | 28 | for prop in list_props: 29 | yield prop 30 | 31 | self.add_all_container_properties_to_queue(list_props) 32 | 33 | def add_all_container_properties_to_queue(self, props): 34 | for prop in props: 35 | self.add_additional_links_to_queue(prop) 36 | 37 | def add_additional_links_to_queue(self, entry_props): 38 | new_props = [] 39 | 40 | self.get_container_element_links(entry_props) 41 | 42 | def get_container_element_links(self, entry_properties): 43 | url = None 44 | contents = None 45 | 46 | if entry_properties and "description" in entry_properties: 47 | contents = entry_properties["description"] 48 | url = entry_properties["link"] 49 | 50 | if contents and url: 51 | parser = ContentLinkParser(url, contents) 52 | links = parser.get_links() 53 | 54 | for link in links: 55 | if link.find("news.ycombinator.com") >= 0: 56 | BackgroundJobController.link_scan(link, source=self.get_source()) 57 | 58 | def get_container_element_contents(self, properties): 59 | if "description" in properties: 60 | return properties["description"] 61 | -------------------------------------------------------------------------------- /rsshistory/pluginsources/sourcecontrollerbuilder.py: -------------------------------------------------------------------------------- 1 | from .sourcerssplugin import BaseRssPlugin 2 | from .sourceparseplugin import BaseParsePlugin 3 | from .sourcejsonplugin import BaseSourceJsonPlugin 4 | 5 | from .codeprojectplugin import CodeProjectPlugin 6 | from .tvn24plugin import TVN24Plugin 7 | from .spotifyplugin import SpotifyPlugin 8 | from .rssparserplugin import RssParserPlugin 9 | from .hackernewsparserplugin import HackerNewsParserPlugin 10 | from .emailsourceplugin import EmailSourcePlugin 11 | from ..models import AppLogging 12 | 13 | 14 | class SourceControllerBuilder(object): 15 | plugins = [ 16 | BaseRssPlugin, 17 | BaseParsePlugin, 18 | BaseSourceJsonPlugin, 19 | EmailSourcePlugin, 20 | # domain specific 21 | CodeProjectPlugin, 22 | TVN24Plugin, 23 | SpotifyPlugin, 24 | HackerNewsParserPlugin, 25 | ] 26 | 27 | def get(source_id): 28 | from ..models import SourceDataModel 29 | 30 | sources = SourceDataModel.objects.filter(id=source_id) 31 | if len(sources) == 0: 32 | return None 33 | 34 | source = sources[0] 35 | # database operations should be short lived. we do not pass source object. 36 | 37 | for plugin_def in SourceControllerBuilder.plugins: 38 | plugin = plugin_def(source_id) 39 | if source.source_type == plugin.PLUGIN_NAME: 40 | return plugin 41 | 42 | AppLogging.notify( 43 | "Incorrectly configured source, ID:{} title:{}, type:{}. Setting it to base RSS type".format( 44 | source.id, source.title, source.source_type 45 | ) 46 | ) 47 | 48 | source.source_type = "BaseRssPlugin" 49 | source.save() 50 | 51 | def get_plugin_names(): 52 | result = set() 53 | for plugin_def in SourceControllerBuilder.plugins: 54 | result.add(plugin_def.PLUGIN_NAME) 55 | 56 | return list(result) 57 | -------------------------------------------------------------------------------- /rsshistory/pluginsources/sourceparseplugin.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | import re 3 | import os 4 | import time 5 | 6 | from utils.dateutils import DateUtils 7 | from webtoolkit import UrlLocation 8 | 9 | from ..models import AppLogging 10 | from ..controllers import LinkDataController, BackgroundJobController 11 | from ..apps import LinkDatabase 12 | from ..pluginurl.entryurlinterface import EntryUrlInterface 13 | from ..configuration import Configuration 14 | 15 | from .sourcegenericplugin import SourceGenericPlugin 16 | 17 | 18 | class BaseParsePlugin(SourceGenericPlugin): 19 | PLUGIN_NAME = "BaseParsePlugin" 20 | 21 | def __init__(self, source_id): 22 | super().__init__(source_id) 23 | 24 | def is_link_valid(self, address): 25 | source = self.get_source() 26 | 27 | # if not UrlLocation(self.get_address()).is_link_in_domain(address): 28 | # return False 29 | 30 | # if not address.startswith(source.url): 31 | # return False 32 | 33 | p = UrlLocation(address) 34 | ext = p.get_page_ext() 35 | 36 | if ext == "html" or ext == "htm" or ext == None: 37 | return True 38 | 39 | return False 40 | 41 | def get_link_data(self, source, link): 42 | url = EntryUrlInterface(link) 43 | 44 | props = url.get_props() 45 | if props: 46 | props["source_url"] = source.url 47 | props["source"] = source 48 | 49 | return props 50 | 51 | def get_entries(self): 52 | links_str_vec = self.get_links() 53 | num_entries = len(links_str_vec) 54 | 55 | for index, link_str in enumerate(links_str_vec): 56 | if not self.is_link_valid(link_str): 57 | continue 58 | 59 | objs = LinkDataController.objects.filter(link=link_str) 60 | if objs.exists(): 61 | continue 62 | 63 | self.add_link(link_str) 64 | 65 | return [] 66 | -------------------------------------------------------------------------------- /rsshistory/pluginsources/spotifyplugin.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from webtoolkit import UrlLocation 4 | 5 | from .sourceparseplugin import BaseParsePlugin 6 | 7 | 8 | class SpotifyPlugin(BaseParsePlugin): 9 | PLUGIN_NAME = "SpotifyPlugin" 10 | 11 | def __init__(self, source_id): 12 | super().__init__(source_id) 13 | 14 | def is_link_valid(self, address): 15 | url = self.get_address() 16 | 17 | if not UrlLocation(url).is_link_in_domain(address): 18 | return False 19 | 20 | search_pattern = UrlLocation(url).get_domain() + "/episode" 21 | 22 | if re.search(search_pattern, address): 23 | return True 24 | return False 25 | -------------------------------------------------------------------------------- /rsshistory/pluginsources/tvn24plugin.py: -------------------------------------------------------------------------------- 1 | from .sourcerssplugin import BaseRssPlugin 2 | 3 | 4 | class TVN24Plugin(BaseRssPlugin): 5 | PLUGIN_NAME = "TVN24Plugin" 6 | 7 | def __init__(self, source_id): 8 | super().__init__(source_id) 9 | 10 | def is_link_valid(self, address): 11 | if address.find("TVN24-po-ukrainsku") >= 0: 12 | return False 13 | return True 14 | -------------------------------------------------------------------------------- /rsshistory/pluginurl/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module should include every special handling for URLs, from various services 3 | 4 | By default includes everything that extends behavior. 5 | We can add different site mechanisms, handlers, controllers. 6 | """ 7 | 8 | from .urlhandler import UrlHandler, UrlHandlerEx 9 | from .entryurlinterface import EntryUrlInterface 10 | -------------------------------------------------------------------------------- /rsshistory/repositoryfactory.py: -------------------------------------------------------------------------------- 1 | from utils.services.gitrepository import GitRepository 2 | from utils.repositoryinterface import RepositoryInterface 3 | 4 | from .models import AppLogging, DataExport 5 | 6 | 7 | class RepositoryFactory(object): 8 | def get(export_data): 9 | if export_data.export_type == DataExport.EXPORT_TYPE_GIT: 10 | return GitRepository 11 | 12 | elif export_data.export_type == DataExport.EXPORT_TYPE_LOC: 13 | return RepositoryInterface 14 | 15 | else: 16 | raise NotImplementedError("Not implemented export type") 17 | -------------------------------------------------------------------------------- /rsshistory/serializers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data serializers. Mostly for Export / import 3 | """ 4 | 5 | from .entriesexporter import MainExporter, entry_to_json 6 | from .entrydailydataexpoter import EntryDailyDataMainExporter 7 | from .entryyeardataexporter import EntryYearDataMainExporter 8 | from .entrynotimedataexporter import EntryNoTimeDataMainExporter 9 | from .sourcesserializer import SourceSerializerWrapper 10 | from .domainexporter import DomainJsonExporter 11 | from .keywordexporter import KeywordExporter 12 | 13 | from .servicedatamarginaliacrawleroutput import MarginaliaCrawlerOutput 14 | from .instanceimporter import InstanceExporter, InstanceImporter 15 | 16 | from .jsonimporter import JsonImporter, MapImporter 17 | -------------------------------------------------------------------------------- /rsshistory/serializers/domainexporter.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | class DomainJsonExporter(object): 5 | def get_json(self, domains): 6 | json_data = [] 7 | for domain in domains: 8 | json_data.append(domain.get_map()) 9 | 10 | # JsonResponse 11 | return {"domains": json_data} 12 | 13 | def get_text(self, domains): 14 | json_data = self.get_json(domains) 15 | return json.dumps(json_data) 16 | -------------------------------------------------------------------------------- /rsshistory/serializers/keywordexporter.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | class KeywordExporter(object): 5 | def get_json(self, keywords): 6 | # JsonResponse 7 | return {"keywords": keywords} 8 | 9 | def get_text(self, domains): 10 | json_data = self.get_json(domains) 11 | return json.dumps(json_data) 12 | -------------------------------------------------------------------------------- /rsshistory/serializers/servicedatamarginaliacrawleroutput.py: -------------------------------------------------------------------------------- 1 | """ 2 | https://downloads.marginalia.nu/ 3 | 4 | - Find a downloadable file 5 | - it is tarball 6 | - extract it 7 | - copy contents of 'crawler.log' 8 | """ 9 | 10 | 11 | class MarginaliaCrawlerOutput(object): 12 | def __init__(self, contents): 13 | self.contents = contents 14 | 15 | def get_links(self): 16 | links = [] 17 | 18 | lines = self.contents.split("\n") 19 | for line in lines: 20 | line = line.strip() 21 | if line: 22 | link = self.get_processed_link(line) 23 | if link: 24 | links.append(link) 25 | 26 | return links 27 | 28 | def get_processed_link(self, line): 29 | wh = line.find(" ") 30 | if wh >= 0: 31 | return line[:wh] 32 | -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/config.js: -------------------------------------------------------------------------------- 1 | // user configuration things 2 | let view_display_style = "style-light"; 3 | let view_display_type = "standard"; 4 | let view_show_icons = false; 5 | let view_small_icons = false; 6 | let user_age = 18; 7 | let debug_mode = false; 8 | 9 | // other global variables 10 | let search_suggestions = []; 11 | let common_indicators = null; 12 | 13 | let highlight_bookmarks = false; 14 | let entries_direct_links = false; 15 | let sort_function = "-date_published"; 16 | let default_page_size = 200; 17 | 18 | // Things that come from config 19 | let entries_visit_alpha=1.0; 20 | let entries_dead_alpha=0.5; 21 | -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/account.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/account.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/archive.org.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/archive.org.ico -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/archive.org.save.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/archive.org.save.ico -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/favicon.ico -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-add-link-96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-add-link-96.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-back-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-back-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-bookmark-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-bookmark-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-bookmarks-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-bookmarks-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-broadcast-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-broadcast-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-broadcast-add-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-broadcast-add-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-channel-add-96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-channel-add-96.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-configuration-67.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-configuration-67.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-database-export-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-database-export-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-down-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-down-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-download-96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-download-96.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-download-music-96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-download-music-96.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-download-page-96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-download-page-96.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-download-video-96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-download-video-96.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-drop-down-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-drop-down-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-edit-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-edit-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-external-link-128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-external-link-128.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-ftp-96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-ftp-96.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-heart-monitor-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-heart-monitor-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-hide-96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-hide-96.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-home-96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-home-96.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-letters-96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-letters-96.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-link-90.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-link-90.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-locked-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-locked-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-login-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-login-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-logout-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-logout-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-music-96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-music-96.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-nas-96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-nas-96.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-new-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-new-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-not-bookmark-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-not-bookmark-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-paper-64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-paper-64.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-pause-squared-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-pause-squared-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-play-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-play-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-plus-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-plus-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-radar-64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-radar-64.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-rate-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-rate-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-save-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-save-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-schedule-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-schedule-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-search-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-search-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-show-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-show-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-skull-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-skull-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-start-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-start-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-stop-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-stop-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-tags-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-tags-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-translate-128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-translate-128.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-trash-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-trash-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-trash-multiple-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-trash-multiple-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-tv-show-32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-tv-show-32.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-unlocked-96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-unlocked-96.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-up-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-up-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-update-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-update-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-update-skull-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-update-skull-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-video-96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-video-96.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-view-details-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-view-details-100.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-www-64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-www-64.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/icons8-youtube-music-96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/icons8-youtube-music-96.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/icons/rss.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/icons/rss.gif -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/images/piesel.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/images/piesel.jpg -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/images/sign-304093_640.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/static/rsshistory/images/sign-304093_640.png -------------------------------------------------------------------------------- /rsshistory/static/rsshistory/test/test.sh: -------------------------------------------------------------------------------- 1 | node test.entries.js 2 | -------------------------------------------------------------------------------- /rsshistory/tasks.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | import gc 3 | 4 | from .apps import LinkDatabase 5 | from .models import AppLogging 6 | from .configuration import Configuration, ConfigurationEntry 7 | 8 | 9 | def process_jobs_task(Processor, tasks_info): 10 | """! 11 | Processes all jobs for task 12 | """ 13 | c = Configuration.get_object() 14 | if not c.config_entry.enable_background_jobs: 15 | return 16 | 17 | c.config_entry = ConfigurationEntry.get() 18 | 19 | handler = Processor(tasks_info=tasks_info) 20 | 21 | handler.run() 22 | 23 | more_jobs = handler.is_more_jobs() 24 | gc.collect() 25 | return more_jobs 26 | 27 | 28 | def process_job_task(Processor, tasks_info): 29 | """! 30 | Processes on job for task 31 | """ 32 | c = Configuration.get_object() 33 | if not c.config_entry.enable_background_jobs: 34 | return 35 | 36 | c.config_entry = ConfigurationEntry.get() 37 | 38 | handler = Processor(tasks_info=tasks_info) 39 | 40 | status = handler.run_one_job() 41 | more_jobs = handler.is_more_jobs() 42 | gc.collect() 43 | return more_jobs 44 | -------------------------------------------------------------------------------- /rsshistory/templates/auth/user_list.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | {% for object in content_list %} 6 |
7 | User ID:{{object.id}} 8 | User name:{{object.username}} 9 | Password:password 10 | e-mail:{{object.email}} 11 | Staff:{{object.is_staff}} 12 | Active:{{object.is_active}} 13 | Last login:{{object.last_login}} 14 | Date joined:{{object.date_joined}} 15 |
16 | 17 | History 18 |
19 |
20 | {% endfor %} 21 | 22 | {% endblock %} 23 | -------------------------------------------------------------------------------- /rsshistory/templates/registration/login.html: -------------------------------------------------------------------------------- 1 | {% extends 'rsshistory/base_generic.html' %} 2 | {% load static %} 3 | {% block content %} 4 | 5 |

Log In

6 | 7 |
8 | {% csrf_token %} 9 | {% for field in form %} 10 |
11 | 12 | {{field }} 13 |
14 | {% if field.help_text %} 15 | 16 | {{field.help_text | safe }} 17 | 18 | {% endif %} 19 | {% endfor %} 20 | 21 |
22 | 23 | {% if form.errors.items %} 24 |
    25 | {% for field, errors in form.errors.items %} 26 | {% for error in errors %} 27 |
  • {{ field|title }}: {{ error }}
  • 28 | {% endfor %} 29 | {% endfor %} 30 |
31 | {% endif %} 32 | 33 | Register by making e-mail request. 34 | 35 | {% endblock %} 36 | -------------------------------------------------------------------------------- /rsshistory/templates/registration/logout.html: -------------------------------------------------------------------------------- 1 | 2 | logged out 3 | -------------------------------------------------------------------------------- /rsshistory/templates/robots.txt: -------------------------------------------------------------------------------- 1 | # robots.txt is stupid https://wiki.archiveteam.org/index.php?title=Robots.txt 2 | # [Sarcasm] lets write urls that people should not look at. Pretty please! 3 | 4 | User-Agent: * 5 | Disallow: /private/ 6 | Disallow: /junk/ 7 | 8 | # do not allow bots to slow us down! 9 | 10 | User-agent: AdsBot-Google 11 | Disallow: / 12 | 13 | User-agent: Googlebot 14 | Disallow: /apps/ 15 | 16 | User-agent: Google-Extended 17 | Disallow: /apps/ 18 | 19 | User-agent: Amazonbot 20 | Disallow: / 21 | 22 | User-agent: anthropic-ai 23 | Disallow: / 24 | 25 | User-agent: CCBot 26 | Disallow: / 27 | 28 | User-agent: ChatGPT-User 29 | Disallow: / 30 | 31 | User-agent: cohere-ai 32 | Disallow: / 33 | 34 | User-agent: FacebookBot 35 | Disallow: / 36 | 37 | User-agent: GPTBot 38 | Disallow: / 39 | 40 | User-agent: SemrushBot 41 | Disallow: / 42 | 43 | User-agent: meta-externalagent 44 | Disallow: / 45 | 46 | Disallow: /harming/humans 47 | Disallow: /ignoring/human/orders 48 | Disallow: /harm/to/self 49 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/about.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% block content %} 3 | 4 | {% include "rsshistory/about_element.html" %} 5 | 6 | {% endblock %} 7 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/apikeys_list.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | 10 | 11 |

API keys

12 | {% if content_list %} 13 |
14 | {% for apikey_object in content_list %} 15 |
16 |
17 | Key: {{apikey_object.key}} 18 | 19 | {% with user=apikey_object.user %} 20 | {{user.id}} 21 | {{user.username}} 22 | {% endwith %} 23 |
24 | 25 |
26 | 27 | 28 | 29 |
30 |
31 | {% endfor %} 32 | {% else %} 33 | No keys yet 34 | {% endif %} 35 | 36 | {% endblock %} 37 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/appusers.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | 13 | 14 | 15 | {% for user in content_list %} 16 |
17 | User ID:{{user.id}} 18 | User name:{{user.username}} 19 | Password:password 20 | e-mail:{{user.email}} 21 | Staff:{{user.is_staff}} 22 | Active:{{user.is_active}} 23 | Last login:{{user.last_login}} 24 | Date joined:{{user.date_joined}} 25 | 31 |
32 | {% endfor %} 33 | 34 | {% endblock %} 35 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/backgroundjob_list.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 |
6 | {% include "rsshistory/backgroundjob_list_actions.html" %} 7 |
8 | 9 |

Jobs

10 | 11 | 12 | 13 | 14 | 15 | Loading... 16 | 17 | 18 | 20 | 21 | 24 | 25 | {% endblock %} 26 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/base_footer.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 | 4 | 22 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/base_generic.html: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | {% include "rsshistory/base_head.html" %} 9 | 10 | 11 | 12 | {% include "rsshistory/base_menu.html" %} 13 | 14 | 15 |
16 | {% block content %}{% endblock %} 17 |
18 | 19 |
20 | {% include "rsshistory/base_footer.html" %} 21 |
22 | 23 | 24 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/blockentry_list.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | 13 | 14 |

Block entries, domains

15 | {% if blockentries %} 16 |
17 | {% for blockentry in blockentries %} 18 | {{ blockentry.url }}, 19 | {% endfor %} 20 |
21 | 22 | {% include "rsshistory/pagination.html" %} 23 | {% else %} 24 | No URLs yet 25 | {% endif %} 26 | 27 | {% endblock %} 28 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/blockentrylist_list.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | 16 | 17 |

Block lists

18 | {% if blocklists %} 19 |
20 | {% for blocklist in blocklists %} 21 |
22 | {% if not blocklist.processed %} 23 | [NOT PROCESSED] 24 | {% endif %} 25 | 26 | {% include "rsshistory/icon_external.html" %} 27 | {{blocklist.url}} 28 | 29 | 30 | {% include "rsshistory/icon_update.html" %} 31 | 32 | 33 | 34 | {% include "rsshistory/icon_remove.html" %} 35 | 36 |
37 | {% endfor %} 38 | 39 | {% include "rsshistory/pagination.html" %} 40 |
41 | {% else %} 42 | No URLs yet 43 | {% endif %} 44 | 45 | {% endblock %} 46 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/credentials_detail.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | 13 | 14 |
ID: {{object.id}}
15 | 16 |

{{object.name}}

17 |
Credentials type: {{object.credential_type}}
18 |
UserName: {{object.username}}
19 | 20 | {% if object.owner %} 21 | {% with user=object.owner %} 22 |
OwnerID: {{user.id}}
23 |
Owner Name: {{user.username}}
24 | {% endwith %} 25 | {% endif %} 26 | 27 | {% endblock %} 28 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/credentials_list.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | 10 | 11 |

Credentials

12 | {% if content_list %} 13 |
14 | {% for cred_object in content_list %} 15 | 31 | {% endfor %} 32 | {% else %} 33 | No credentials yet 34 | {% endif %} 35 | 36 | {% endblock %} 37 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/data_errors.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% block content %} 3 | 4 |

Incorrect youtube links

5 | We should store links in a coherent manner. 6 | 7 |
    8 | {% for link in incorrect_youtube_links %} 9 |
  • 10 | {{link.title}} 11 | Details 12 | Fix link 13 | Remove entry 14 |
  • 15 | {% endfor %} 16 |
17 | 18 |

Link incorrect language

19 | 20 | {% for link in links_with_incorrect_language %} 21 | 25 | {% endfor %} 26 | 27 |

Tags errors

28 | 29 |
    30 | {% for tag in tags_for_missing_links %} 31 |
  • {{tag.tag}} {{tag.link}} {{tag.author}} {{tag.date}} 32 | Fix 33 | Remove 34 |
  • 35 | {% endfor %} 36 |
37 | 38 | {% endblock %} 39 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/dataexport_list.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | 10 | 11 |

Exports

12 | {% if content_list %} 13 |
14 | {% for export_object in content_list %} 15 | 42 | {% endfor %} 43 | {% else %} 44 | No domains yet 45 | {% endif %} 46 | 47 | {% endblock %} 48 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/domain_category_list.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 |
6 | {% if user.is_staff %} 7 | Reset dynamic dat 8 | {% endif %} 9 |
10 | 11 |

Domain category list

12 | 13 | {% for category in category_list %} 14 |
{{category.category}} Link
15 | 16 |
-- 17 | {% for subcategory in category.subcategories.all %} 18 | {{subcategory.subcategory}} Link 19 | {% endfor %} 20 |
21 | 22 |
23 | {% endfor %} 24 | 25 | {% include "rsshistory/domain_help_element.html" %} 26 | 27 | {% endblock %} 28 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/domain_help_element.html: -------------------------------------------------------------------------------- 1 |

Domain help

2 | 7 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/domains_list.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | {% include "rsshistory/form_filter_element.html" %} 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 15 | 16 | 19 | 20 | {% endblock %} 21 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/domains_list__script.js: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 | function getDomainTemplate(domain, show_icons = true, small_icons = false) { 4 | var id = domain.id; 5 | var domain_only = domain.domain; 6 | var main = domain.main; 7 | var subdomain = domain.subdomain; 8 | var suffix = domain.suffix; 9 | var tld = domain.tld; 10 | 11 | var url_absolute = "{% url 'rsshistory:domain-detail' 1007 %}"; 12 | url_absolute = url_absolute.replace("1007", domain.id); 13 | 14 | var title = domain.domain; 15 | 16 | var template = ` 17 | 20 | 21 | ${title} 22 | 23 | 24 | `; 25 | 26 | return template; 27 | } 28 | 29 | function fillDomainList(domains) { 30 | let htmlOutput = ''; 31 | 32 | if (domains && domains.length > 0) { 33 | domains.forEach(domain => { 34 | var template_text = getDomainTemplate(domain, view_show_icons, view_small_icons); 35 | htmlOutput += template_text; 36 | }); 37 | } 38 | 39 | return htmlOutput; 40 | } 41 | 42 | function fillListData() { 43 | let data = object_list_data; 44 | $('#listData').html(""); 45 | 46 | let domains = data.domains; 47 | 48 | if (!domains || domains.length == 0) { 49 | $('#listData').html("No domains found"); 50 | $('#pagination').html(""); 51 | return; 52 | } 53 | 54 | var finished_text = fillDomainList(domains); 55 | $('#listData').html(finished_text); 56 | let pagination = GetPaginationNav(data.page, data.num_pages, data.count); 57 | $('#pagination').html(pagination); 58 | } 59 | 60 | {% include "rsshistory/javascript_list_utilities.js" %} 61 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/entries_import_summary.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% block content %} 3 | 4 |
5 | {{ summary_text }}
6 | 
7 | 8 | {% endblock %} 9 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/entries_untagged.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% block content %} 3 | 4 | {% for link in links %} 5 |
{{link.link }}
6 | {% endfor %} 7 |

8 | 9 | {% endblock %} 10 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/entry__add_simple.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 |
6 | {% include "rsshistory/form_multiline_element.html" %} 7 |
8 | 9 |
    10 |
  • Internet is dangerous, so carefully select which links you add
  • 11 |
12 | 13 |
14 |
15 |
16 | 17 | 20 | 21 | {% endblock %} 22 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/entry_add__form.html: -------------------------------------------------------------------------------- 1 | {% include "rsshistory/form_multiline_element.html" %} 2 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/entry_added.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% block content %} 3 | 4 | 8 | 9 | {% endblock %} 10 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/entry_detail.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | {% include "rsshistory/entry_detail__frame.html" %} 6 | 7 |
8 | Loading... 9 |
10 | 11 | {% include "rsshistory/entry_detail__tags.html" %} 12 | 13 |
14 |
15 | 16 |
17 |
18 | 19 |
20 |
21 | 22 | {% include "rsshistory/entry_detail__source.html" %} 23 | 24 |
25 | Loading... 26 |
27 | 28 |
29 |
30 | 31 | 34 | 35 | {% include "rsshistory/entry_detail__comments.html" %} 36 | 37 |
38 | Loading... 39 |
40 | 41 |
42 | Loading... 43 |
44 | 45 | 46 | 47 | 50 | 51 |
52 |
53 | 54 | {% endblock %} 55 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/entry_detail__comments.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 | {% if object.comments.all %} 4 |

Comments

5 |
6 | {% for comment in object.comments.all %} 7 |
{{comment.get_comment | safe | linebreaks}}
8 |
9 | {{comment.date_published}} {{comment.user_object}} 10 | {% if comment.user_object.username == user.get_username %} 11 | 19 | {% endif %} 20 |
21 |
22 | {% endfor %} 23 |
24 | {% endif %} 25 | 26 | {% if user.is_authenticated and object.is_commentable %} 27 | Add comment 28 | {% endif %} 29 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/entry_detail__dynamic.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 | {% if object.comments.all %} 4 |
5 | {% endif %} 6 | {% include "rsshistory/entry_detail__comments.html" %} 7 | 8 | {% include "rsshistory/entry_detail__parameters_operation.html" %} 9 | 10 | {% if transitions %} 11 | {% endif %} 12 | {% include "rsshistory/entry_detail__related.html" %} 13 | 14 |
15 | {% include "rsshistory/search_places_element.html" %} 16 |
17 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/entry_detail__frame.html: -------------------------------------------------------------------------------- 1 | 2 |
3 | {{object_controller.get_frame_html | safe }} 4 |
5 | 6 | 7 |
8 |

14 | 15 |
16 | {{ object_controller.get_title_html }} 17 |
18 |

19 | 20 | 21 | {% include "rsshistory/icon_external.html" %} 22 | {{object.link}} 23 | 24 |
25 |
26 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/entry_detail__source.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 | {% if object.source %} 4 | 25 | {% endif %} 26 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/entry_detail__tag_form.html: -------------------------------------------------------------------------------- 1 |
2 | {% csrf_token %} 3 | {{ form.tags }} 4 | 5 | 6 |
7 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/entry_detail__tags.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 |
4 | 10 |
11 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/entry_detail__vote_form.html: -------------------------------------------------------------------------------- 1 |
2 | {% csrf_token %} 3 | {{ form.vote }} 4 | 5 | 6 |
7 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/entry_list.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | {% include "rsshistory/entry_list__actions.html" %} 6 | 7 | {% include "rsshistory/form_filter_oneliner_element.html" %} 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 17 | 18 |
19 |
20 | 21 | 22 | 23 | 26 | 27 | {% endblock %} 28 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/entry_list_display_element.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | {% include "rsshistory/entry_thumbnail_element.html" %} 5 |
{{entry.title}}
6 |
7 |
8 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/entry_thumbnail_element.html: -------------------------------------------------------------------------------- 1 | 2 | {% if user_config.show_icons%} 3 | 16 | {% endif %} 17 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/form_basic.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 |

{{form_title}}

6 | 7 |
8 | {{form_description_pre| safe}} 9 |
10 | 11 | {% include "rsshistory/form_multiline_element.html" %} 12 | 13 |
14 | {{form_description_post| safe}} 15 |
16 | 17 | {% endblock %} 18 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/form_multiline_element.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 |
8 | {% csrf_token %} 9 | 10 | {% for field in form %} 11 | {% if field.is_hidden %} 12 |
13 | {% else %} 14 |
15 | {% endif %} 16 | 17 |
18 |
19 | {{field }} 20 |
21 | {% if field.help_text %} 22 | 23 | {{field.help_text | safe }} 24 | 25 | {% endif %} 26 |
27 |
28 | {% endfor %} 29 | 30 | 31 | {% if form_submit_button_name %} 32 | 33 | {% else %} 34 | 35 | {% endif %} 36 | 37 | 38 | {% if form.errors.items %} 39 |
    40 | {% for field, errors in form.errors.items %} 41 | {% for error in errors %} 42 |
  • {{ field|title }}: {{ error }}
  • 43 | {% endfor %} 44 | {% endfor %} 45 |
46 | {% endif %} 47 | 48 | {% if form_errors %} 49 |
50 | {% for error in form_errors %} 51 |
{{error}}
52 | {% endfor %} 53 |
54 | {% endif %} 55 | 56 | {% if form_warnings %} 57 |
58 | {% for warning in form_warnings %} 59 |
{{warning}}
60 | {% endfor %} 61 |
62 | {% endif %} 63 | 64 | {% if form_notes %} 65 |
66 | {% for note in form_notes %} 67 |
{{note}}
68 | {% endfor %} 69 |
70 | {% endif %} 71 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/form_oneliner.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | 6 |
7 |

{{form_title}}

8 | 9 |
10 | {{form_description_pre| safe}} 11 |
12 | 13 | {% include "rsshistory/form_oneliner_element.html" %} 14 | 15 |
16 | {{form_description_post| safe}} 17 |
18 |
19 | 20 | {% endblock %} 21 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/form_oneliner_element.html: -------------------------------------------------------------------------------- 1 |
8 | {% csrf_token %} 9 | 10 | {% for field in form %} 11 | 12 | 13 | 14 | {{field }} 15 | 16 | {% endfor %} 17 | 18 | {% if form_submit_button_name %} 19 | 20 | {% else %} 21 | 22 | {% endif %} 23 |
24 | 25 | {% if form.errors.items %} 26 |
    27 | {% for field, errors in form.errors.items %} 28 | {% for error in errors %} 29 |
  • {{ field|title }}: {{ error }}
  • 30 | {% endfor %} 31 | {% endfor %} 32 |
33 | {% endif %} 34 | 35 | {% if form_errors %} 36 |
37 | {% for error in form_errors %} 38 |
{{error}}
39 | {% endfor %} 40 |
41 | {% endif %} 42 | 43 | {% if form_warnings %} 44 |
45 | {% for warning in form_warnings %} 46 |
{{warning}}
47 | {% endfor %} 48 |
49 | {% endif %} 50 | 51 | {% if form_notes %} 52 |
53 | {% for note in form_notes %} 54 |
{{note}}
55 | {% endfor %} 56 |
57 | {% endif %} 58 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/form_search_init.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 |
6 | {% include "rsshistory/form_filter_oneliner_element.html" %} 7 | {% endblock %} 8 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/form_search_omni.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | {% include "rsshistory/form_search_omni_element.html" %} 6 | 7 | {% endblock %} 8 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/form_search_omni_element.html: -------------------------------------------------------------------------------- 1 | {% include "rsshistory/form_filter_element.html" %} 2 | 3 |
4 | {% include "rsshistory/form_search_syntax_element.html" %} 5 |
6 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/form_search_syntax_element.html: -------------------------------------------------------------------------------- 1 | {% if entry_query_names and entry_query_operators %} 2 |
3 | Available fields: 4 |
    5 |
  • 6 | {% for entry_query_name in entry_query_names %} 7 | "{{entry_query_name}}", 8 | {% endfor %} 9 |
  • 10 |
11 | 12 | Available comparison operators: 13 |
    14 |
  • 15 | {% for entry_query_operator in entry_query_operators %} 16 | "{{entry_query_operator}}", 17 | {% endfor %} 18 |
  • 19 |
20 | 21 | Notes: 22 |
    23 |
  • "&" - And
  • 24 |
  • "|" - Or
  • 25 |
  • "~" - Not, must appear before bracket
  • 26 |
  • 27 | "===" - iexact 28 |
  • 29 |
  • 30 | "==" for compare this = that 31 |
  • 32 |
  • 33 | "=" to find items containing string (icontains) 34 |
  • 35 |
  • 36 | 37 | {% include "rsshistory/icon_external.html" %} 38 | Standard Django operators are also supported. "title__isnull == True" Is correct search term. 39 |
  • 40 |
  • System accepts brackets
  • 41 |
  • Suggestions work in respect case of input (lower, upper case)
  • 42 |
43 | 44 | Examples: 45 |
    46 |
  • "china" - searches for china
  • 47 |
  • "china | japan" - searches china, or japan
  • 48 |
  • "title = china" - searches anything with china inside
  • 49 |
  • "field = True" - all rows with 'field' being True
  • 50 |
  • "field__icontains = something" - all rows with 'field' containing 'something'
  • 51 |
  • "(title = china | title = japan) & bookmarked = True" - all bookmarked link, with china, or japan in title
  • 52 |
53 |
54 | {% endif %} 55 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/form_source_add_simple.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 |
6 | {% include "rsshistory/form_oneliner_element.html" %} 7 | 8 | {% include "rsshistory/source_help_element.html" %} 9 |
10 | 11 | {% endblock %} 12 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/gateways.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 |

Gateways

6 | A gateway is a location through which you can find useful things 7 | 8 | {% for Type_Name, items in gateways.items %} 9 | {% if items %} 10 |

{{Type_Name}}

11 | 24 | {% endif %} 25 | {% endfor %} 26 | 27 | {% endblock %} 28 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/go_back.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | {{summary_text}} 6 | 7 | 8 | 9 | {% endblock %} 10 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/icon_add.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/icon_disable.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/icon_edit.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/icon_enable.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/icon_external.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/icon_link.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 | 4 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/icon_remove.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/icon_source.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/icon_update.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/import_internetarchive.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% block content %} 3 | 4 |

{{form_title}}

5 | 6 |

{{form_description}}

7 | 8 |

9 | {% csrf_token %} 10 | {{ form }} 11 | 12 |
13 | 14 | {% endblock %} 15 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/index.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | {% include "rsshistory/form_search_omni_element.html" %} 6 | 7 | {% endblock %} 8 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/info_users.html: -------------------------------------------------------------------------------- 1 | 2 |

User management (to be done)

3 |
    4 |
  • New users can be added by administrator, or moderators, or users with enough amount of karma
  • 5 |
  • You do not set passwords, you are given a proper one. Write it down.
  • 6 |
7 | 8 | Karma effects on the users 9 |
    10 |
  • If you have karma below 0, your account is banned
  • 11 |
  • After certain threshold you can submit new links
  • 12 |
  • After certain threshold you can submit comments
  • 13 |
  • After certain threshold you can upvote, downvote comments
  • 14 |
  • After certain threshold you can add users
  • 15 |
16 | 17 | What causes karma to change 18 |
    19 |
  • admin, or moderators
  • 20 |
  • adding vote for a link
  • 21 |
  • upvotes, or downvotes on comments
  • 22 |
  • bans of other users you invited
  • 23 |
24 | 25 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/keywords_list.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | 8 | 9 |

KeyWords

10 |

11 | Last updated {{last_date}} 12 |

13 | 14 | {% if content_list %} 15 | {% for keyword in content_list %} 16 | {{keyword.0}} 17 | {% endfor %} 18 |
19 | 20 | {% else %} 21 | No keywords yet 22 | {% endif %} 23 | 24 |

Details

25 | 26 | Keywords are obtained from every new link title. After 2 days keywords are removed. 27 | 28 | {% endblock %} 29 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/missing_rights.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 |
 6 | User does not have rights to perform this action
 7 | 
8 | 9 | {% endblock %} 10 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/modelfiles_list.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 |
6 | {% if user.is_staff %} 7 | 8 | 9 | 10 | {% endif %} 11 |
12 | 13 |

{{page_obj.paginator.count}} Files

14 | Size:{{sum_size_megabytes}}MB, {{sum_size_kilobytes}}KB, {{sum_size_bytes}}B 15 | 16 | {% if content_list %} 17 |
18 | {% for modelfile in content_list %} 19 |
20 | {{modelfile.file_name}} 21 | {{modelfile.get_size_bytes}} 22 | 23 |
24 | Model file 25 | 26 | 27 | 28 | 29 |
30 | 31 |
32 | {% endfor %} 33 | {% else %} 34 | No files yet 35 | {% endif %} 36 | 37 | {% endblock %} 38 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/page_show_properties.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 |
6 | {% include "rsshistory/form_multiline_element.html" %} 7 |
8 | 9 |
    10 |
  • Internet is dangerous, so carefully select which links you add
  • 11 |
12 | 13 |
14 |
15 |
16 | 17 | 20 | 21 | {% endblock %} 22 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/pagination.html: -------------------------------------------------------------------------------- 1 | 36 | 37 | 38 | Page {{ page_obj.number }} of {{ page_obj.paginator.num_pages }}. Total:{{page_obj.paginator.object_list.count}}, {{page_obj.paginator.per_page}} per page. 39 | 40 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/readlater_list.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 |

Check later list

6 | 7 | 12 | 13 | 14 | 15 | 16 | 17 | Loading... 18 | 19 | 20 | 22 | 23 | 24 | 25 | 28 | 29 | {% endblock %} 30 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/search_engines.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | {% for Type_Name, items in search_engines.items %} 6 | {% if items %} 7 |

{{Type_Name}}

8 | 21 | {% endif %} 22 | {% endfor %} 23 | 24 | {% endblock %} 25 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/search_places_element.html: -------------------------------------------------------------------------------- 1 | {% if search_engines %} 2 | 3 |

4 | 7 |

8 | 9 |
10 | 20 |
21 | 22 | {% endif %} 23 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/searchview_detail.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | {% if user.is_staff %} 6 | 7 | 8 | {% include "rsshistory/icon_edit.html" %} 9 | 10 | 11 | 12 | 13 | 14 | 15 | {% endif %} 16 | 17 |

{{object.name}}

18 | 19 |
ID: {{object.id}}
20 |
Name: {{object.name}}
21 |
Hover text:{{object.hover_text}}
22 |
Icon:{{object.icon}}
23 |
Order By:{{object.order_by}}
24 |
Entry limit:{{object.entry_limit}}
25 |
Auto fetch:{{object.auto_fetch}}
26 |
date published day limit:{{object.date_published_day_limit}}
27 |
date created day limit:{{object.date_created_day_limit}}
28 |
user:{{object.user}}
29 |
conditions: {{object.get_conditions}}
30 | 31 | {% endblock %} 32 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/searchview_list.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | 10 | 11 |

Search views

12 | {% if content_list %} 13 |
14 | {% for search_view in content_list %} 15 | 39 | {% endfor %} 40 | {% else %} 41 | No rules yet 42 | {% endif %} 43 | 44 | {% endblock %} 45 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/source__add_simple.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 |
6 | {% include "rsshistory/form_multiline_element.html" %} 7 |
8 | 9 |
    10 |
  • Internet is dangerous, so carefully select which links you add
  • 11 |
12 | 13 |
14 |
15 |
16 | 17 | 20 | 21 | {% endblock %} 22 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/source_add__form.html: -------------------------------------------------------------------------------- 1 | {% include "rsshistory/form_multiline_element.html" %} 2 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/source_added.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% block content %} 3 | 4 | 8 | 9 | {% endblock %} 10 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/source_client_reader.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | RSS Feed Example 7 | 8 | 9 | 10 |
11 | 12 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/source_help_element.html: -------------------------------------------------------------------------------- 1 |

Notes

2 |
    3 |
  • If provided URL is HTML page that provides RSS link, then the system will try to find the link
  • 4 |
  • If provide youtube video in link, then link channel RSS will be added instead
  • 5 |
  • If provide reddit link, then RSS channel will be added instead
  • 6 |
  • Internet is dangerous, so carefully select which links you add
  • 7 |
8 | 9 |

Resources

10 | 28 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/sources_import_summary.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% block content %} 3 | 4 |
5 | {{ summary_text }}
6 | 
7 | 8 | {% endblock %} 9 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/sources_list.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | {% include "rsshistory/sources_list__actions.html" %} 6 | 7 | {% include "rsshistory/form_filter_element.html" %} 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 17 | 18 | 21 | 22 | {% endblock %} 23 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/sources_list__standard.html: -------------------------------------------------------------------------------- 1 | 9 | {% if user_config.show_icons %} 10 | 11 | {% endif %} 12 | 13 | {% if source.dead %} 14 | [DEAD] 15 | {% endif %} 16 | {% if not source.enabled %} 17 | [H] 18 | {% endif %} 19 | {% if source.dynamic_data.consecutive_errors %} 20 | [ERRORS] 21 | {% endif %} 22 | {{source.title}} 23 | 24 | 27 | 30 | 31 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/summary_present.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 |
6 |
7 | 8 |
9 | {{ summary_text | safe }} 10 |
11 | 12 | {% endblock %} 13 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/tags_list.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | 27 | 28 |

{{tags_title}}

29 | 30 |
31 | {% if content_list %} 32 | {% for tag in content_list %} 33 | 34 | #{{tag.tag}}, 35 | {% endfor %} 36 | {% else %} 37 | No tags 38 | {% endif %} 39 |
40 | 41 | {% endblock %} 42 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/user_config.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | 6 | 7 | API keys 8 | 9 | 10 | Credentials 11 | 12 |

[{{user_object.karma}}] {{ user_object.user.username }}

13 | 14 |
15 | {% csrf_token %} 16 | {% for field in config_form %} 17 |
18 | 19 |
20 |
21 | {{field }} 22 |
23 | 24 | {{field.help_text | safe }} 25 | 26 |
27 |
28 | {% endfor %} 29 | 30 | 31 |
32 | 33 | {% with object=user_object.user %} 34 |
User ID:{{object.id}}
35 |
User name:{{object.username}}
36 |
e-mail:{{object.email}}
37 |
Staff:{{object.is_staff}}
38 |
Active:{{object.is_active}}
39 |
Last login:{{object.last_login}}
40 |
Date joined:{{object.date_joined}}
41 | {% endwith %} 42 | 43 | {% endblock %} 44 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/user_configs.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | {% for user_config in user_objects %} 6 |

{{user_config.user.username}}

7 | 8 | Karma: {{user_config.karma}} 9 | Birth date:{{user_config.birth_date}} 10 | 11 | UserConfig ID:{{user_config.id}} 12 | User ID:{{user_config.user.id}} 13 | 14 | User name:{{user_config.user.username}} 15 | Password:password 16 | e-mail:{{user_config.user.email}} 17 | Staff:{{user_config.user.is_staff}} 18 | Active:{{user_config.user.is_active}} 19 | Last login:{{user_config.user.last_login}} 20 | Date joined:{{user_config.user.date_joined}} 21 | 22 | 26 | {% endfor %} 27 | 28 | {% endblock %} 29 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/user_personal.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 |
6 | {% if config.track_user_navigation %} 7 | 8 | 9 | Browse History 10 | {% endif %} 11 | 12 | 13 | Comments 14 | {% if config.track_user_searches %} 15 | 16 | 17 | Search history 18 | {% endif %} 19 | 20 |
21 | 22 | {% endblock %} 23 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/userbrowsehistory_list.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 |

Browse History

6 | 7 | 8 | 9 | 10 | 11 | Loading... 12 | 13 | 14 | 16 | 17 | 18 | 19 | 22 | 23 | {% endblock %} 24 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/userbrowsehistory_list__script.js: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 | 4 | function fillQueueList(queue) { 5 | let htmlOutput = ''; 6 | 7 | if (queue && queue.length > 0) { 8 | queue.forEach(entry => { 9 | htmlOutput += getEntryVisitsBar(entry); 10 | }); 11 | } 12 | 13 | return htmlOutput; 14 | } 15 | 16 | 17 | function fillListData() { 18 | let data = object_list_data; 19 | $('#listData').html(""); 20 | 21 | let queue = data.queue; 22 | 23 | if (!queue || queue.length == 0) { 24 | $('#listData').html("Queue is empty"); 25 | $('#pagination').html(""); 26 | return; 27 | } 28 | 29 | var finished_text = fillQueueList(queue); 30 | $('#listData').html(finished_text); 31 | let pagination = GetPaginationNav(data.page, data.num_pages, data.count); 32 | $('#pagination').html(pagination); 33 | } 34 | 35 | 36 | $(document).on("click", '#clear-list', function(e) { 37 | e.preventDefault(); 38 | 39 | $('#clear-list').prop("disabled", true); 40 | $('.remove-button').prop("disabled", true); 41 | sendClearList(); 42 | }); 43 | 44 | 45 | $(document).on("click", '.remove-button', function(e) { 46 | e.preventDefault(); 47 | 48 | $('#clear-list').prop("disabled", true); 49 | $('.remove-button').prop("disabled", true); 50 | 51 | const buttonId = $(this).attr('id'); 52 | sendRemoveListItem(buttonId); 53 | }); 54 | 55 | {% include "rsshistory/javascript_list_utilities.js" %} 56 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/usercommentscontroller_list.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 |

Comment History

6 | 7 | {% if content_list %} 8 |
9 | {% for comment in content_list %} 10 | 30 | {% endfor %} 31 | {% include "rsshistory/pagination.html" %} 32 | {% else %} 33 | No history yet 34 | {% endif %} 35 | 36 | {% endblock %} 37 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/usersearchhistory_list.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 |

Search History

6 | 7 | {% if content_list %} 8 | 25 | {% include "rsshistory/pagination.html" %} 26 | 27 | {% else %} 28 | No history yet 29 | {% endif %} 30 | 31 | {% endblock %} 32 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/usertags_list.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 | 30 | 31 |

{{tags_title}}

32 | 33 |
34 | {% if content_list %} 35 | {% for tag in content_list %} 36 | 37 | #{{tag.tag}}, 38 | {% endfor %} 39 | {% else %} 40 | No tags 41 | {% endif %} 42 |
43 | 44 | {% endblock %} 45 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/wizard_setup_init.html: -------------------------------------------------------------------------------- 1 | {% extends base_generic %} 2 | {% load static %} 3 | {% block content %} 4 | 5 |
    6 |
  • 7 | 8 |
    Setup best suited for RSS clients, news.
    9 |
  • 10 | 11 |
  • 12 | 13 |
    Setup best suited for YouTube-like players, image galleries.
    14 |
  • 15 | 16 |
  • 17 | 18 |
    Setup best suited for Google like Search engines.
    19 |
  • 20 |
21 | 22 |
23 |
24 | 25 | 28 | 29 | {% endblock %} 30 | -------------------------------------------------------------------------------- /rsshistory/templates/rsshistory/wizard_setup_init.js: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 | function getSpinnerContainer(text = '') { 4 | return ` ${text}`; 5 | } 6 | 7 | function getSuccessIcon() { 8 | return "✔"; 9 | } 10 | 11 | function getErrorIcon() { 12 | return "❌"; 13 | } 14 | 15 | 16 | function setupFor(url, button_element, button_text) { 17 | $(button_element).prop("disabled", true); 18 | 19 | $(button_element).html( 20 | ` Loading...` 21 | ); 22 | 23 | let spinner_container = getSpinnerContainer(); 24 | let success_icon = getSuccessIcon(); 25 | let error_icon = getErrorIcon(); 26 | 27 | $("#setupSpace").append(`

${spinner_container} Configuring...

`); 28 | 29 | getDynamicJson(url, function (data) { 30 | let source_link = "{% url 'rsshistory:sources' %}"; 31 | 32 | if (data.status) { 33 | $("#buttonsSpace").hide(); 34 | $("#config-line").html(`${success_icon} Configuring... OK`); 35 | 36 | $("#setupSpace").append(` 37 |

You can start by adding sources Sources

38 |

You can enable list blocks filter domains using easy list, etc.

`); 39 | } 40 | else { 41 | $("#config-line").html(`${error_icon} Configuring... ERROR`); 42 | } 43 | $(button_element).prop("disabled", false).html(button_text); 44 | }); 45 | } 46 | 47 | 48 | $("#btnFetchNews").click(function() { 49 | setupFor("{% url 'rsshistory:json-wizard-setup-news' %}", "#btnFetchNews", "Setup News Reader"); 50 | }); 51 | $("#btnFetchGallery").click(function() { 52 | setupFor("{% url 'rsshistory:json-wizard-setup-gallery' %}", "#btnFetchGallery", "Setup Gallery Viewer"); 53 | }); 54 | $("#btnFetchSearchEngine").click(function() { 55 | setupFor("{% url 'rsshistory:json-wizard-setup-search-engine' %}", "#btnFetchSearchEngine", "Setup Simple Search Engine"); 56 | }); 57 | -------------------------------------------------------------------------------- /rsshistory/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/tests/__init__.py -------------------------------------------------------------------------------- /rsshistory/tests/fake/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/rsshistory/tests/fake/__init__.py -------------------------------------------------------------------------------- /rsshistory/tests/fake/returndislike.py: -------------------------------------------------------------------------------- 1 | return_dislike_json = """ 2 | {"id":"sPyAQQklc1s","dateCreated":"2022-04-09T21:07:42.773025Z","likes":195413,"rawDislikes":33,"rawLikes":373,"dislikes":15788,"rating":4.700986264269582,"viewCount":34345508,"deleted":false} 3 | """ 4 | -------------------------------------------------------------------------------- /rsshistory/tests/fake/robotstxtcom.py: -------------------------------------------------------------------------------- 1 | robots_txt_example_com_robots = """ 2 | User-agent: * 3 | Disallow: /admin/ 4 | """ 5 | -------------------------------------------------------------------------------- /rsshistory/tests/test_dateutils.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | 3 | from utils.dateutils import DateUtils 4 | 5 | from .fakeinternet import FakeInternetTestCase 6 | 7 | 8 | class DateUtilsTest(FakeInternetTestCase): 9 | def setUp(self): 10 | self.disable_web_pages() 11 | 12 | def test_get_day_diff(self): 13 | input_time = DateUtils.get_datetime_now_utc() - timedelta(days=17) 14 | days = DateUtils.get_day_diff(input_time) 15 | 16 | self.assertTrue(days == 17) 17 | 18 | input_time = DateUtils.get_datetime_now_utc() - timedelta(days=19) 19 | days = DateUtils.get_day_diff(input_time) 20 | 21 | self.assertTrue(days == 19) 22 | -------------------------------------------------------------------------------- /rsshistory/tests/test_entrypreviewbuilder.py: -------------------------------------------------------------------------------- 1 | from ..viewspkg.plugins import EntryPreviewBuilder 2 | from ..viewspkg.plugins import EntryYouTubePlugin 3 | from ..viewspkg.plugins import EntryOdyseePlugin 4 | from ..viewspkg.plugins import EntryGenericPlugin 5 | 6 | from ..controllers import ( 7 | LinkDataController, 8 | ) 9 | 10 | from .fakeinternet import FakeInternetTestCase 11 | 12 | 13 | class EntryUrlInterfaceTest(FakeInternetTestCase): 14 | def setUp(self): 15 | self.disable_web_pages() 16 | 17 | ob = LinkDataController.objects.create( 18 | source_url="https://www.youtube.com", 19 | link="https://www.youtube.com/watch?v=123223", 20 | title="The second link", 21 | bookmarked=False, 22 | language="en", 23 | ) 24 | 25 | ob = LinkDataController.objects.create( 26 | source_url="https://odysee.com", 27 | link="https://odysee.com/@samtime:1/apple-reacts-to-leaked-windows-12:1?test", 28 | title="The second link", 29 | bookmarked=False, 30 | language="en", 31 | ) 32 | 33 | ob = LinkDataController.objects.create( 34 | source_url="https://odysee.com", 35 | link="https://odysee.com/@samtime:1", 36 | title="The second link", 37 | bookmarked=False, 38 | language="en", 39 | ) 40 | 41 | def test_video_youtube_handler(self): 42 | entries = LinkDataController.objects.filter( 43 | link__icontains="https://www.youtube.com/watch?" 44 | ) 45 | entry = entries[0] 46 | 47 | h = EntryPreviewBuilder.get(entry) 48 | self.assertTrue(type(h) is EntryYouTubePlugin) 49 | 50 | def test_video_odysee_handler(self): 51 | entries = LinkDataController.objects.filter( 52 | link="https://odysee.com/@samtime:1/apple-reacts-to-leaked-windows-12:1?test" 53 | ) 54 | entry = entries[0] 55 | 56 | h = EntryPreviewBuilder.get(entry) 57 | self.assertTrue(type(h) is EntryOdyseePlugin) 58 | -------------------------------------------------------------------------------- /rsshistory/tests/test_gateway.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | 3 | from ..models import Gateway 4 | from ..configuration import Configuration 5 | from .fakeinternet import FakeInternetTestCase 6 | 7 | 8 | class GatewayTest(FakeInternetTestCase): 9 | def setUp(self): 10 | self.disable_web_pages() 11 | 12 | def test_populate(self): 13 | # call tested function 14 | Gateway.populate() 15 | 16 | gateways = Gateway.objects.all() 17 | self.assertTrue(gateways.count() > 0) 18 | 19 | def test_cleanup__not(self): 20 | Gateway.objects.all().delete() 21 | 22 | # call tested function 23 | Gateway.cleanup() 24 | 25 | gateways = Gateway.objects.all() 26 | self.assertEqual(gateways.count(), 0) 27 | 28 | def test_cleanup__true(self): 29 | Gateway.objects.all().delete() 30 | 31 | cfg = {} 32 | cfg["full"] = True 33 | 34 | # call tested function 35 | Gateway.cleanup(cfg) 36 | 37 | gateways = Gateway.objects.all() 38 | self.assertTrue(gateways.count() > 0) 39 | -------------------------------------------------------------------------------- /rsshistory/tests/test_modelfiles.py: -------------------------------------------------------------------------------- 1 | from ..models import ModelFiles 2 | from ..configuration import Configuration 3 | from .fakeinternet import FakeInternetTestCase 4 | 5 | 6 | class ModelFilesTest(FakeInternetTestCase): 7 | def setUp(self): 8 | self.disable_web_pages() 9 | 10 | config_entry = Configuration.get_object().config_entry 11 | config_entry.enable_file_support = True 12 | config_entry.save() 13 | 14 | Configuration.get_object().config_entry = config_entry 15 | 16 | def test_add(self): 17 | binary_data = "something".encode() 18 | 19 | ModelFiles.add("https://google.com", binary_data) 20 | 21 | self.assertEqual(ModelFiles.objects.all().count(), 1) 22 | -------------------------------------------------------------------------------- /rsshistory/tests/test_pagedisplay.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import ( 2 | create_engine, 3 | ) 4 | from pathlib import Path 5 | 6 | from webtoolkit import ( 7 | RssPage, 8 | HtmlPage, 9 | UrlAgeModerator, 10 | ) 11 | from utils.sqlmodel import SqlModel, EntriesTable, SourcesTable 12 | from utils.serializers import PageDisplay 13 | 14 | from ..webtools import ( 15 | YouTubeVideoHandler, 16 | ) 17 | 18 | from .fakeinternet import FakeInternetTestCase, MockRequestCounter 19 | 20 | 21 | class PageDisplayTest(FakeInternetTestCase): 22 | def setUp(self): 23 | self.disable_web_pages() 24 | 25 | def test_html_url(self): 26 | MockRequestCounter.mock_page_requests = 0 27 | 28 | page_display = PageDisplay("https://linkedin.com") 29 | 30 | # one for page, one for rss 31 | self.assertEqual(MockRequestCounter.mock_page_requests, 1) 32 | 33 | def test_youtube(self): 34 | MockRequestCounter.mock_page_requests = 0 35 | 36 | page_display = PageDisplay( 37 | "https://www.youtube.com/feeds/videos.xml?channel_id=SAMTIMESAMTIMESAMTIMESAM" 38 | ) 39 | 40 | # one for page, one for rss 41 | self.assertEqual(MockRequestCounter.mock_page_requests, 1) 42 | 43 | def test_rss(self): 44 | MockRequestCounter.mock_page_requests = 0 45 | 46 | page_display = PageDisplay("https://rsspage.com/rss.xml") 47 | 48 | # one for page, one for rss 49 | self.assertEqual(MockRequestCounter.mock_page_requests, 1) 50 | -------------------------------------------------------------------------------- /rsshistory/tests/test_readmarkers.py: -------------------------------------------------------------------------------- 1 | from datetime import timedelta 2 | from django.contrib.auth.models import User 3 | 4 | from utils.dateutils import DateUtils 5 | 6 | from ..controllers import SourceDataController, LinkDataController 7 | from ..models import ReadMarkers 8 | from ..configuration import Configuration 9 | 10 | from .fakeinternet import FakeInternetTestCase 11 | 12 | 13 | class ReadMarkersTest(FakeInternetTestCase): 14 | def setUp(self): 15 | self.disable_web_pages() 16 | self.setup_configuration() 17 | 18 | self.user = User.objects.create_user( 19 | username="TestUser", password="testpassword", is_staff=True 20 | ) 21 | 22 | def test_set__general_marker(self): 23 | ReadMarkers.objects.all().delete() 24 | 25 | # call tested function 26 | ReadMarkers.set(self.user) 27 | 28 | self.assertEqual(ReadMarkers.objects.all().count(), 1) 29 | 30 | def test_set__source_marker(self): 31 | ReadMarkers.objects.all().delete() 32 | 33 | source_youtube = SourceDataController.objects.create( 34 | url="https://youtube.com", 35 | title="YouTube", 36 | export_to_cms=True, 37 | ) 38 | 39 | # call tested function 40 | ReadMarkers.set(self.user, source_youtube) 41 | 42 | self.assertEqual(ReadMarkers.objects.all().count(), 1) 43 | -------------------------------------------------------------------------------- /rsshistory/tests/test_searchengines.py: -------------------------------------------------------------------------------- 1 | from ..controllers import SearchEngines, SearchEngineGoogle, SearchEngineGoogleCache 2 | 3 | from .fakeinternet import FakeInternetTestCase, MockRequestCounter 4 | 5 | 6 | class SearchEnginesTest(FakeInternetTestCase): 7 | def setUp(self): 8 | self.disable_web_pages() 9 | 10 | def test_search_engine_google(self): 11 | s = SearchEngineGoogle("test") 12 | 13 | self.assertEqual(s.get_search_string(), "https://google.com/search?q=test") 14 | 15 | def test_search_engine_google_cache(self): 16 | url = "https://lifehacker.com/how-to-access" 17 | 18 | s = SearchEngineGoogleCache(url) 19 | 20 | self.assertEqual( 21 | s.get_search_string(), 22 | "http://webcache.googleusercontent.com/search?q=cache:https%3A//lifehacker.com/how-to-access", 23 | ) 24 | -------------------------------------------------------------------------------- /rsshistory/tests/test_service_internetarchive.py: -------------------------------------------------------------------------------- 1 | from utils.services import InternetArchive 2 | from datetime import datetime 3 | 4 | from .fakeinternet import FakeInternetTestCase 5 | 6 | 7 | class InternetArchiveTest(FakeInternetTestCase): 8 | def setUp(self): 9 | self.disable_web_pages() 10 | 11 | def test_translate(self): 12 | p = InternetArchive("https://www-youtube.com/test?parameter=True") 13 | 14 | date_str = "2024-05-12" 15 | date_input = datetime.strptime(date_str, "%Y-%m-%d") 16 | 17 | # call tested function 18 | url = p.get_archive_url(date_input) 19 | 20 | self.assertEqual( 21 | url, 22 | "https://web.archive.org/web/20240512110000*/https://www-youtube.com/test?parameter=True", 23 | ) 24 | -------------------------------------------------------------------------------- /rsshistory/tests/test_service_translate.py: -------------------------------------------------------------------------------- 1 | from utils.services import GoogleTranslate 2 | from .fakeinternet import FakeInternetTestCase 3 | 4 | 5 | class GoogleTranslateTest(FakeInternetTestCase): 6 | def setUp(self): 7 | self.disable_web_pages() 8 | 9 | def test_translate(self): 10 | p = GoogleTranslate("https://www-youtube.com/test?parameter=True") 11 | 12 | # call tested function 13 | url = p.get_translate_url() 14 | 15 | self.assertEqual( 16 | url, 17 | "https://www--youtube-com.translate.goog/test?_x_tr_sl=auto&_x_tr_tl=en&_x_tr_hl=en&_x_tr_pto=wapp¶meter=True", 18 | ) 19 | -------------------------------------------------------------------------------- /rsshistory/tests/test_views_apikeys.py: -------------------------------------------------------------------------------- 1 | from django.urls import reverse 2 | from django.contrib.auth.models import User 3 | 4 | from utils.dateutils import DateUtils 5 | 6 | from ..apps import LinkDatabase 7 | from ..models import ApiKeys 8 | 9 | from .fakeinternet import FakeInternetTestCase, MockRequestCounter 10 | 11 | 12 | class ApiKeysViewsTest(FakeInternetTestCase): 13 | def setUp(self): 14 | self.disable_web_pages() 15 | 16 | self.user = User.objects.create_user( 17 | username="testuser", 18 | password="testpassword", 19 | is_staff=True, 20 | ) 21 | self.client.login(username="testuser", password="testpassword") 22 | 23 | def test_api_key_add(self): 24 | url = reverse("{}:api-key-add".format(LinkDatabase.name)) 25 | 26 | # call tested function 27 | response = self.client.get(url) 28 | 29 | self.assertEqual(response.status_code, 200) 30 | 31 | def test_api_key_remove(self): 32 | key = ApiKeys.objects.create(key="test") 33 | 34 | url = reverse("{}:api-key-remove".format(LinkDatabase.name), args=[key.id]) 35 | 36 | # call tested function 37 | response = self.client.get(url) 38 | 39 | self.assertEqual(response.status_code, 302) 40 | 41 | objects = ApiKeys.objects.all() 42 | self.assertFalse(objects.exists()) 43 | 44 | def test_api_keys(self): 45 | key = ApiKeys.objects.create(key="test") 46 | 47 | url = reverse("{}:api-keys".format(LinkDatabase.name)) 48 | 49 | # call tested function 50 | response = self.client.get(url) 51 | 52 | self.assertEqual(response.status_code, 200) 53 | -------------------------------------------------------------------------------- /rsshistory/tests/test_views_applogging.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from pytz import timezone 3 | import logging 4 | from django.urls import reverse 5 | from django.contrib.auth.models import User 6 | 7 | from utils.dateutils import DateUtils 8 | 9 | from ..apps import LinkDatabase 10 | from ..models import KeyWords, AppLogging 11 | 12 | from .fakeinternet import FakeInternetTestCase, MockRequestCounter 13 | 14 | 15 | class AppLoggingViewsTest(FakeInternetTestCase): 16 | def setUp(self): 17 | self.disable_web_pages() 18 | 19 | self.user = User.objects.create_user( 20 | username="testuser", 21 | password="testpassword", 22 | is_staff=True, 23 | ) 24 | self.client.login(username="testuser", password="testpassword") 25 | 26 | def test_json_logs(self): 27 | AppLogging.objects.create( 28 | info_text="text", 29 | level=int(logging.INFO), 30 | date=datetime.now(timezone("UTC")), 31 | ) 32 | 33 | url = reverse("{}:json-logs".format(LinkDatabase.name)) 34 | 35 | # call tested function 36 | response = self.client.get(url) 37 | 38 | # redirect to see all jobs 39 | self.assertEqual(response.status_code, 200) 40 | 41 | def test_json_logs__info(self): 42 | AppLogging.objects.create( 43 | info_text="text", 44 | level=int(logging.INFO), 45 | date=datetime.now(timezone("UTC")), 46 | ) 47 | 48 | url = reverse("{}:json-logs".format(LinkDatabase.name)) 49 | url = url + "?infos=1" 50 | 51 | # call tested function 52 | response = self.client.get(url) 53 | 54 | # redirect to see all jobs 55 | self.assertEqual(response.status_code, 200) 56 | -------------------------------------------------------------------------------- /rsshistory/tests/test_views_credentials.py: -------------------------------------------------------------------------------- 1 | from django.urls import reverse 2 | from django.contrib.auth.models import User 3 | 4 | from utils.dateutils import DateUtils 5 | 6 | from ..apps import LinkDatabase 7 | from ..models import Credentials 8 | 9 | from .fakeinternet import FakeInternetTestCase, MockRequestCounter 10 | 11 | 12 | class CredentialsViewsTest(FakeInternetTestCase): 13 | def setUp(self): 14 | self.disable_web_pages() 15 | 16 | self.user = User.objects.create_user( 17 | username="testuser", 18 | password="testpassword", 19 | is_staff=True, 20 | ) 21 | self.client.login(username="testuser", password="testpassword") 22 | 23 | def test_credential_add(self): 24 | url = reverse("{}:credential-add".format(LinkDatabase.name)) 25 | 26 | # call tested function 27 | response = self.client.get(url) 28 | 29 | self.assertEqual(response.status_code, 200) 30 | 31 | def test_credential_edit(self): 32 | key = Credentials.objects.create(name="test") 33 | 34 | url = reverse("{}:credential-edit".format(LinkDatabase.name), args=[key.id]) 35 | 36 | # call tested function 37 | response = self.client.get(url) 38 | 39 | self.assertEqual(response.status_code, 200) 40 | 41 | def test_credential_remove(self): 42 | key = Credentials.objects.create(name="test") 43 | 44 | url = reverse("{}:credential-remove".format(LinkDatabase.name), args=[key.id]) 45 | 46 | # call tested function 47 | response = self.client.get(url) 48 | 49 | self.assertEqual(response.status_code, 302) 50 | 51 | objects = Credentials.objects.all() 52 | self.assertFalse(objects.exists()) 53 | 54 | def test_credentials(self): 55 | key = Credentials.objects.create(name="test") 56 | 57 | url = reverse("{}:credentials".format(LinkDatabase.name)) 58 | 59 | # call tested function 60 | response = self.client.get(url) 61 | 62 | self.assertEqual(response.status_code, 200) 63 | -------------------------------------------------------------------------------- /rsshistory/tests/test_views_domains.py: -------------------------------------------------------------------------------- 1 | from django.urls import reverse 2 | from django.contrib.auth.models import User 3 | 4 | from utils.dateutils import DateUtils 5 | 6 | from ..apps import LinkDatabase 7 | from ..controllers import ( 8 | LinkDataController, 9 | DomainsController, 10 | BackgroundJobController, 11 | ) 12 | 13 | from .fakeinternet import FakeInternetTestCase 14 | 15 | 16 | class DomainsViewsTest(FakeInternetTestCase): 17 | def setUp(self): 18 | self.disable_web_pages() 19 | 20 | self.user = User.objects.create_user( 21 | username="testuser", password="testpassword", is_staff=True 22 | ) 23 | 24 | def test_domains(self): 25 | entry = LinkDataController.objects.create( 26 | source_url="https://linkedin.com", 27 | link="https://linkedin.com", 28 | title="The first link", 29 | description="the first link description", 30 | language="en", 31 | ) 32 | 33 | DomainsController.objects.create(domain="https://linkedin.com") 34 | 35 | self.client.login(username="testuser", password="testpassword") 36 | 37 | url = reverse("{}:domains".format(LinkDatabase.name)) 38 | response = self.client.get(url) 39 | 40 | self.assertEqual(response.status_code, 200) 41 | 42 | def test_domains_json(self): 43 | entry = LinkDataController.objects.create( 44 | source_url="https://linkedin.com", 45 | link="https://linkedin.com", 46 | title="The first link", 47 | description="the first link description", 48 | language="en", 49 | ) 50 | 51 | DomainsController.objects.create(domain="https://linkedin.com") 52 | 53 | self.client.login(username="testuser", password="testpassword") 54 | 55 | url = reverse("{}:domains-json".format(LinkDatabase.name)) 56 | response = self.client.get(url) 57 | 58 | self.assertEqual(response.status_code, 200) 59 | -------------------------------------------------------------------------------- /rsshistory/tests/test_views_readmarkers.py: -------------------------------------------------------------------------------- 1 | from django.urls import reverse 2 | from django.contrib.auth.models import User 3 | 4 | from ..apps import LinkDatabase 5 | from ..models import ReadMarkers 6 | from ..configuration import Configuration 7 | from ..controllers import SourceDataController 8 | 9 | from .fakeinternet import FakeInternetTestCase 10 | 11 | 12 | class ReadMarkerTests(FakeInternetTestCase): 13 | def setUp(self): 14 | self.disable_web_pages() 15 | c = Configuration.get_object().config_entry 16 | c.enable_file_support = True 17 | c.save() 18 | 19 | self.user = User.objects.create_user( 20 | username="testuser", password="testpassword", is_staff=True 21 | ) 22 | 23 | def test_set_read_marker(self): 24 | ReadMarkers.objects.all().delete() 25 | 26 | self.client.login(username="testuser", password="testpassword") 27 | 28 | url = reverse("{}:set-read-marker".format(LinkDatabase.name)) 29 | 30 | # call user action 31 | response = self.client.get(url) 32 | 33 | # redirect to view the link again 34 | self.assertEqual(response.status_code, 302) 35 | 36 | def test_set_source_read_marker(self): 37 | source = SourceDataController.objects.create( 38 | url="https://youtube.com", 39 | title="YouTube", 40 | export_to_cms=True, 41 | ) 42 | 43 | ReadMarkers.objects.all().delete() 44 | 45 | self.client.login(username="testuser", password="testpassword") 46 | 47 | url = reverse( 48 | "{}:set-source-read-marker".format(LinkDatabase.name), args=[source.id] 49 | ) 50 | 51 | # call user action 52 | response = self.client.get(url) 53 | 54 | # redirect to view the link again 55 | self.assertEqual(response.status_code, 302) 56 | -------------------------------------------------------------------------------- /rsshistory/tests/test_views_searchviews.py: -------------------------------------------------------------------------------- 1 | from django.urls import reverse 2 | from django.contrib.auth.models import User 3 | 4 | from utils.dateutils import DateUtils 5 | 6 | from ..apps import LinkDatabase 7 | from ..models import SearchView 8 | 9 | from .fakeinternet import FakeInternetTestCase 10 | 11 | 12 | class SearchViewTests(FakeInternetTestCase): 13 | def setUp(self): 14 | self.disable_web_pages() 15 | 16 | self.user = User.objects.create_user( 17 | username="testuser", password="testpassword", is_staff=True 18 | ) 19 | 20 | def test_searchview_add__show_form(self): 21 | self.client.login(username="testuser", password="testpassword") 22 | 23 | url = reverse("{}:searchview-add".format(LinkDatabase.name), args=[]) 24 | 25 | # call user action 26 | response = self.client.get(url) 27 | 28 | # redirect to view the link again 29 | self.assertEqual(response.status_code, 200) 30 | 31 | def test_entry_rule_edit__post(self): 32 | searchview = SearchView.objects.create() 33 | 34 | self.client.login(username="testuser", password="testpassword") 35 | 36 | url = reverse( 37 | "{}:searchview-edit".format(LinkDatabase.name), args=[searchview.id] 38 | ) 39 | 40 | form_data = { 41 | "enabled": False, 42 | "rule_name": "test_rule_edited", 43 | "trigger_text": "", 44 | "trigger_text_hits": 1, 45 | "trigger_text_fields": "", 46 | } 47 | 48 | # call user action 49 | response = self.client.post(url, data=form_data) 50 | 51 | self.assertEqual(response.status_code, 302) 52 | -------------------------------------------------------------------------------- /rsshistory/tests/test_views_social.py: -------------------------------------------------------------------------------- 1 | from django.urls import reverse 2 | from django.contrib.auth.models import User 3 | 4 | from utils.dateutils import DateUtils 5 | 6 | from ..apps import LinkDatabase 7 | from ..controllers import ( 8 | SourceDataController, 9 | LinkDataController, 10 | DomainsController, 11 | BackgroundJobController, 12 | ) 13 | from ..models import ( 14 | KeyWords, 15 | DataExport, 16 | SourceCategories, 17 | SourceSubCategories, 18 | SourceOperationalData, 19 | Browser, 20 | ) 21 | 22 | from .fakeinternet import FakeInternetTestCase, MockRequestCounter 23 | 24 | 25 | class SocialDataViewsTest(FakeInternetTestCase): 26 | def setUp(self): 27 | self.disable_web_pages() 28 | 29 | self.user = User.objects.create_user( 30 | username="testuser", password="testpassword" 31 | ) 32 | self.user.is_staff = True 33 | self.user.save() 34 | 35 | # c = Configuration.get_object() 36 | # c.config_entry.logging_level = AppLogging.DEBUG 37 | # c.config_entry.save() 38 | 39 | def test_edit_form(self): 40 | SourceDataController.objects.all().delete() 41 | 42 | self.client.login(username="testuser", password="testpassword") 43 | 44 | entry = LinkDataController.objects.create( 45 | link="https://linkedin.com", 46 | title="The first link", 47 | description="the first link description", 48 | source=None, 49 | bookmarked=False, 50 | permanent=False, 51 | date_published=DateUtils.get_datetime_now_utc(), 52 | language="en", 53 | ) 54 | 55 | url = reverse("{}:social-data-edit".format(LinkDatabase.name), args=[entry.id]) 56 | 57 | # call user action 58 | response = self.client.get(url) 59 | 60 | # print(response.text.decode('utf-8')) 61 | 62 | self.assertEqual(response.status_code, 200) 63 | -------------------------------------------------------------------------------- /rsshistory/viewspkg/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Views 3 | """ 4 | -------------------------------------------------------------------------------- /rsshistory/viewspkg/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Plugins extending views for different services 3 | """ 4 | 5 | from .entrygenericplugin import EntryButton, EntryParameter, EntryGenericPlugin 6 | from .entryodyseeplugin import EntryOdyseePlugin 7 | from .entrypreviewbuilder import EntryPreviewBuilder 8 | from .entryyoutubeplugin import EntryYouTubePlugin 9 | -------------------------------------------------------------------------------- /rsshistory/viewspkg/plugins/entryodyseeplugin.py: -------------------------------------------------------------------------------- 1 | from django.urls import reverse 2 | from django.templatetags.static import static 3 | 4 | from ...webtools import OdyseeVideoHandler 5 | 6 | from ...apps import LinkDatabase 7 | from ...models import ConfigurationEntry 8 | 9 | from .entrygenericplugin import EntryGenericPlugin, EntryButton, EntryParameter 10 | 11 | 12 | class EntryOdyseePlugin(EntryGenericPlugin): 13 | def __init__(self, entry, user=None): 14 | super().__init__(entry, user) 15 | 16 | def get_frame(self): 17 | h = OdyseeVideoHandler(self.entry.link) 18 | return """ 19 | 20 | """.format( 21 | h.get_link_embed() 22 | ) 23 | 24 | def get_parameters(self): 25 | old_params = super().get_parameters() 26 | return old_params 27 | 28 | def get_frame_html(self): 29 | if not self.entry.is_user_appropriate(self.user): 30 | frame_text = """ 31 |
This material is restricted for age {}
""" 32 | 33 | frame_text = frame_text.format(self.entry.age) 34 | 35 | return frame_text 36 | else: 37 | frame_text = """ 38 |
39 | {} 40 |
""" 41 | 42 | frame_inner = self.get_frame() 43 | 44 | frame_text = frame_text.format(frame_inner) 45 | 46 | return frame_text 47 | 48 | def get_edit_menu_buttons(self): 49 | buttons = super().get_edit_menu_buttons() 50 | return buttons 51 | 52 | def get_view_menu_buttons(self): 53 | buttons = super().get_view_menu_buttons() 54 | return buttons 55 | 56 | def get_advanced_menu_buttons(self): 57 | buttons = super().get_advanced_menu_buttons() 58 | return buttons 59 | -------------------------------------------------------------------------------- /rsshistory/viewspkg/plugins/entrypreviewbuilder.py: -------------------------------------------------------------------------------- 1 | from ...pluginurl.urlhandler import UrlHandler 2 | 3 | from .entryyoutubeplugin import EntryYouTubePlugin 4 | from .entryodyseeplugin import EntryOdyseePlugin 5 | from .entrygenericplugin import EntryGenericPlugin 6 | 7 | 8 | class EntryPreviewBuilder(object): 9 | """ 10 | Builds widget plugin. 11 | Only videos are displayed differently - we have preview for them 12 | 13 | TODO streamable? 14 | TODO pass request instead of user 15 | """ 16 | 17 | def get(entry, user=None): 18 | h = UrlHandler.get_type(entry.link) 19 | 20 | if type(h) is UrlHandler.youtube_video_handler: 21 | return EntryYouTubePlugin(entry, user) 22 | 23 | if type(h) is UrlHandler.odysee_video_handler: 24 | return EntryOdyseePlugin(entry, user) 25 | 26 | return EntryGenericPlugin(entry, user) 27 | -------------------------------------------------------------------------------- /rsshistory/viewspkg/readmarkers.py: -------------------------------------------------------------------------------- 1 | from django.views import generic 2 | from django.urls import reverse 3 | from django.shortcuts import redirect 4 | from django.http import HttpResponseRedirect, HttpResponse 5 | 6 | from ..apps import LinkDatabase 7 | from ..models import ConfigurationEntry 8 | from ..models import ReadMarkers 9 | from ..views import ViewPage 10 | from ..controllers import ( 11 | SourceDataController, 12 | ) 13 | 14 | 15 | def set_read_marker(request): 16 | p = ViewPage(request) 17 | p.set_title("Sets read marker") 18 | data = p.set_access(ConfigurationEntry.ACCESS_TYPE_ALL) 19 | if data is not None: 20 | return data 21 | 22 | ReadMarkers.set_general(request.user) 23 | 24 | return redirect("{}:index".format(LinkDatabase.name)) 25 | 26 | 27 | def set_source_read_marker(request, pk): 28 | p = ViewPage(request) 29 | p.set_title("Sets read marker") 30 | data = p.set_access(ConfigurationEntry.ACCESS_TYPE_ALL) 31 | if data is not None: 32 | return data 33 | 34 | source = SourceDataController.objects.get(id=pk) 35 | 36 | ReadMarkers.set_source(request.user, source) 37 | 38 | return redirect("{}:index".format(LinkDatabase.name)) 39 | -------------------------------------------------------------------------------- /rsshistory/webtools/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Similar project: https://pypi.org/project/abstract-webtools/ 3 | """ 4 | 5 | from .webtools import * 6 | from .webconfig import WebConfig 7 | 8 | from .url import ( 9 | Url, 10 | DomainCache, 11 | DomainCacheInfo, 12 | fetch_url, 13 | fetch_all_urls, 14 | ) 15 | 16 | from .handlers import ( 17 | HttpRequestBuilder, 18 | HttpPageHandler, 19 | YouTubeChannelHandler, 20 | YouTubeVideoHandler, 21 | YouTubeJsonHandler, 22 | OdyseeChannelHandler, 23 | OdyseeVideoHandler, 24 | RedditUrlHandler, 25 | GitHubUrlHandler, 26 | ReturnDislike, 27 | HackerNewsHandler, 28 | InternetArchive, 29 | FourChanChannelHandler, 30 | TwitterUrlHandler, 31 | ) 32 | 33 | from .crawlers import ( 34 | CrawlerInterface, 35 | RequestsCrawler, 36 | SeleniumDriver, 37 | SeleniumChromeHeadless, 38 | SeleniumChromeFull, 39 | SeleniumUndetected, 40 | ScriptCrawler, 41 | StealthRequestsCrawler, 42 | ) 43 | from .crawlerscript import ( 44 | ScriptCrawlerParser, 45 | ) 46 | -------------------------------------------------------------------------------- /rsshistory/webtools/crawlers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Provides various crawling mechanisms, libraries that can crawl. 3 | """ 4 | 5 | from .crawlerinterface import CrawlerInterface, default_user_agent, default_headers 6 | from .crawlers import * 7 | -------------------------------------------------------------------------------- /rsshistory/webtools/crawlerscript.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | 4 | from .webtools import PageRequestObject 5 | from .crawlers import CrawlerInterface 6 | 7 | 8 | class ScriptCrawlerParser(object): 9 | """ 10 | Headers can only be passed by input binary file 11 | """ 12 | 13 | def parse(self): 14 | self.parser = argparse.ArgumentParser(description="Data analyzer program") 15 | self.parser.add_argument("--url", help="Directory to be scanned") 16 | self.parser.add_argument( 17 | "--timeout", default=10, type=int, help="Timeout expressed in seconds" 18 | ) 19 | self.parser.add_argument("--ping", default=False, help="Ping only") 20 | self.parser.add_argument("--headers", default=False, help="Fetch headers only") 21 | self.parser.add_argument("--remote-server", help="Remote server") 22 | self.parser.add_argument("--proxy-address", help="Proxy address") 23 | self.parser.add_argument("--ssl-verify", default=False, help="SSL verify") 24 | 25 | # TODO implement 26 | self.parser.add_argument("--input-data", help="Input request file") 27 | self.parser.add_argument("-v", "--verbose", action="store_true", help="Verbose") 28 | 29 | self.parser.add_argument("-i", "--input", help="Requests binary file") 30 | self.parser.add_argument("-o", "--output-file", help="Response binary file") 31 | 32 | self.args = self.parser.parse_args() 33 | 34 | def is_valid(self): 35 | if "url" not in self.args: 36 | print("Url file not in args") 37 | return False 38 | 39 | if "output_file" not in self.args: 40 | print("Output file not in args") 41 | return False 42 | 43 | if self.args.url is None: 44 | print("Url file not in args") 45 | return False 46 | 47 | return True 48 | 49 | def get_request(self): 50 | r = PageRequestObject(self.args.url) 51 | r.timeout_s = self.args.timeout 52 | r.ping = self.args.ping 53 | r.headers = self.args.headers 54 | 55 | return r 56 | -------------------------------------------------------------------------------- /rsshistory/webtools/handlers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Provides special handling for individual pages 3 | """ 4 | 5 | from .handlerhttppage import * 6 | from .handlerinterface import * 7 | 8 | from .handlerchannelyoutube import * 9 | from .handlervideoyoutube import * 10 | from .handlerchannelodysee import * 11 | from .handlervideoodysee import * 12 | 13 | from .handlers import * 14 | -------------------------------------------------------------------------------- /rsshistory/webtools/handlers/defaulturlhandler.py: -------------------------------------------------------------------------------- 1 | from utils.dateutils import DateUtils 2 | 3 | from webtoolkit import DefaultContentPage 4 | 5 | from .handlerhttppage import HttpPageHandler 6 | 7 | 8 | class DefaultUrlHandler(HttpPageHandler): 9 | """ 10 | This handler works as HTML page handler, mostly 11 | """ 12 | 13 | def __init__(self, url=None, contents=None, settings=None, url_builder=None): 14 | super().__init__(url, settings=settings, url_builder=url_builder) 15 | self.code = self.input2code(url) 16 | 17 | def get_page_url(self, url, crawler_name=None): 18 | settings = {} 19 | settings["handler_class"] = HttpPageHandler 20 | 21 | if crawler_name: 22 | settings["name"] = crawler_name 23 | 24 | url = self.url_builder(url=url, settings=settings) 25 | return url 26 | 27 | 28 | class DefaultChannelHandler(DefaultUrlHandler): 29 | pass 30 | -------------------------------------------------------------------------------- /screenshots/admin_view.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/admin_view.png -------------------------------------------------------------------------------- /screenshots/backgroundjobs_view.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/backgroundjobs_view.PNG -------------------------------------------------------------------------------- /screenshots/browser_list.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/browser_list.PNG -------------------------------------------------------------------------------- /screenshots/browsers.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/browsers.PNG -------------------------------------------------------------------------------- /screenshots/configuration_form.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/configuration_form.PNG -------------------------------------------------------------------------------- /screenshots/dark_theme.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/dark_theme.PNG -------------------------------------------------------------------------------- /screenshots/domains.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/domains.PNG -------------------------------------------------------------------------------- /screenshots/entries_list_search_engine.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/entries_list_search_engine.PNG -------------------------------------------------------------------------------- /screenshots/entries_list_standard.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/entries_list_standard.PNG -------------------------------------------------------------------------------- /screenshots/entries_list_youtube.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/entries_list_youtube.PNG -------------------------------------------------------------------------------- /screenshots/entry_details.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/entry_details.PNG -------------------------------------------------------------------------------- /screenshots/entry_new.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/entry_new.PNG -------------------------------------------------------------------------------- /screenshots/entry_new_simple.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/entry_new_simple.PNG -------------------------------------------------------------------------------- /screenshots/index.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/index.PNG -------------------------------------------------------------------------------- /screenshots/keywords_view.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/keywords_view.PNG -------------------------------------------------------------------------------- /screenshots/logs_view.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/logs_view.PNG -------------------------------------------------------------------------------- /screenshots/page_properties.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/page_properties.PNG -------------------------------------------------------------------------------- /screenshots/search_engines.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/search_engines.PNG -------------------------------------------------------------------------------- /screenshots/search_form.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/search_form.PNG -------------------------------------------------------------------------------- /screenshots/server_status.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/server_status.PNG -------------------------------------------------------------------------------- /screenshots/source_details.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/source_details.PNG -------------------------------------------------------------------------------- /screenshots/source_list.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/source_list.PNG -------------------------------------------------------------------------------- /screenshots/source_new.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/source_new.PNG -------------------------------------------------------------------------------- /screenshots/tags_view.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/tags_view.PNG -------------------------------------------------------------------------------- /screenshots/tools.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/tools.PNG -------------------------------------------------------------------------------- /screenshots/user_configuration_view.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rumca-js/Django-link-archive/e19c0728d465a4f56893d45224e8e18d5682b0a4/screenshots/user_configuration_view.PNG -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .controllers import ( 2 | EntriesTableController, 3 | SourcesTableController, 4 | ) 5 | from .sqlmodel import ( 6 | SqlModel, 7 | EntriesTable, 8 | SourcesTable, 9 | ) 10 | 11 | from .reflected import ( 12 | ReflectedTable, 13 | ReflectedEntryTable, 14 | ReflectedSourceTable, 15 | ) 16 | -------------------------------------------------------------------------------- /utils/basictypes.py: -------------------------------------------------------------------------------- 1 | """ 2 | These types represent commonly used elements: link, channel. 3 | """ 4 | 5 | import os 6 | 7 | 8 | def get_ascii_text(text): 9 | thebytes = text.encode("ascii", "ignore") 10 | return thebytes.decode() 11 | 12 | 13 | def fix_path_for_os(file_path, max_path=260, limit=False): 14 | """ 15 | @param file_path needs to be string 16 | 17 | Windows for example does not allow certain characters in file names 18 | """ 19 | chars = [ 20 | ">", 21 | "<", 22 | ":", 23 | "|", 24 | "?", 25 | "*", 26 | '"', 27 | "'", 28 | ] 29 | 30 | for item in chars: 31 | file_path = file_path.replace(item, "") 32 | 33 | # remove duplicate white spaces? 34 | file_path = " ".join(file_path.split()) 35 | file_path = file_path.strip() 36 | 37 | file_path[: max_path - 1] 38 | 39 | return file_path 40 | 41 | 42 | def get_directory_size_bytes(start_path="."): 43 | total_size = 0 44 | for dirpath, dirnames, filenames in os.walk(start_path): 45 | for f in filenames: 46 | fp = os.path.join(dirpath, f) 47 | # skip if it is symbolic link 48 | if not os.path.islink(fp): 49 | total_size += os.path.getsize(fp) 50 | 51 | return total_size 52 | -------------------------------------------------------------------------------- /utils/controllers/__init__.py: -------------------------------------------------------------------------------- 1 | from .controllers import GenericEntryController 2 | 3 | from .entries import ( 4 | EntryDataBuilder, 5 | EntryWrapper, 6 | EntriesTableController, 7 | ) 8 | from .sources import ( 9 | SourceDataBuilder, 10 | SourcesTableController, 11 | SourceOperationalDataController, 12 | ) 13 | from .system import ( 14 | ConfigurationEntryController, 15 | ) 16 | -------------------------------------------------------------------------------- /utils/controllers/browser.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | class BrowserController(object): 5 | def __init__(self, browser): 6 | self.browser = browser 7 | 8 | def get_setup(self): 9 | settings = {} 10 | if self.browser.settings != None and self.browser.settings != "": 11 | try: 12 | settings = json.loads(self.browser.settings) 13 | except ValueError as E: 14 | print("Error") 15 | 16 | browser_config = { 17 | "crawler": self.browser.crawler, 18 | "name": self.browser.name, 19 | "priority": self.browser.priority, 20 | "settings": settings, 21 | } 22 | 23 | return browser_config 24 | -------------------------------------------------------------------------------- /utils/controllers/system.py: -------------------------------------------------------------------------------- 1 | from ..sqlmodel import ConfigurationEntry 2 | 3 | 4 | class ConfigurationEntryController(object): 5 | def __init__(self, db, session=None): 6 | self.conn = db 7 | self.session = session 8 | 9 | def get_session(self): 10 | if not self.session: 11 | return self.conn.get_session() 12 | else: 13 | return self.session 14 | 15 | def get(self): 16 | Session = self.get_session() 17 | with Session() as session: 18 | return session.query(ConfigurationEntry).first() 19 | 20 | def add(self, config): 21 | # Get the set of column names from EntriesTable 22 | valid_columns = {column.name for column in ConfigurationEntry.__table__.columns} 23 | 24 | # Remove keys that are not in EntriesTable 25 | config = {key: value for key, value in config.items() if key in valid_columns} 26 | 27 | config_obj = ConfigurationEntry(**config) 28 | 29 | Session = self.get_session() 30 | with Session() as session: 31 | session.add(config_obj) 32 | session.commit() 33 | -------------------------------------------------------------------------------- /utils/programwrappers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Program wrappers only 3 | """ 4 | -------------------------------------------------------------------------------- /utils/programwrappers/ffmpeg.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import os 3 | 4 | 5 | class FFmpeg(object): 6 | def __init__(self, name, cwd=None, timeout_s=60 * 60): 7 | self.name = name 8 | self.timeout_s = timeout_s 9 | self.cwd = cwd 10 | 11 | def convert_to_mp3(self, mp3_name): 12 | data = subprocess.run( 13 | ["ffmpeg", "-y", "-i", self.name, "-vn", mp3_name], 14 | stdout=subprocess.PIPE, 15 | stderr=subprocess.PIPE, 16 | cwd=self.cwd, 17 | timeout=self.timeout_s, 18 | ) 19 | 20 | os.remove(self.name) 21 | 22 | return mp3_name 23 | 24 | def get_mp3_file_name(self): 25 | return self.name.replace(".m4a", ".mp3") 26 | 27 | @staticmethod 28 | def validate(): 29 | try: 30 | proc = subprocess.run( 31 | ["ffmpeg"], 32 | stdout=subprocess.PIPE, 33 | stderr=subprocess.PIPE, 34 | timeout=10, 35 | ) 36 | except: 37 | return False 38 | return True 39 | 40 | 41 | class Vlc(object): 42 | def __init__(self, name): 43 | self.name = name 44 | 45 | def run(self): 46 | data = subprocess.run( 47 | ["vlc", self.name, "vlc://quit"], 48 | stdout=subprocess.PIPE, 49 | stderr=subprocess.PIPE, 50 | cwd=self.cwd, 51 | timeout=self.timeout_s, 52 | ) 53 | 54 | @staticmethod 55 | def validate(): 56 | try: 57 | proc = subprocess.run( 58 | ["vlc"], 59 | stdout=subprocess.PIPE, 60 | stderr=subprocess.PIPE, 61 | timeout=10, 62 | ) 63 | except: 64 | return False 65 | return True 66 | -------------------------------------------------------------------------------- /utils/programwrappers/id3v2.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import subprocess 3 | import os 4 | 5 | 6 | class Id3v2(object): 7 | def __init__(self, file_name, data=None, cwd=None, timeout_s=60 * 60): 8 | self.file_name = file_name 9 | self.timeout_s = timeout_s 10 | self.data = data 11 | self.cwd = cwd 12 | 13 | def tag(self): 14 | if not os.path.splitext(self.file_name)[1] == ".mp3": 15 | logging.error("Cannot tag file, not an mp3 file") 16 | return 17 | 18 | song = self.data["title"] 19 | 20 | artist = None 21 | if "artist" in self.data: 22 | artist = self.data["artist"] 23 | album = None 24 | if "album" in self.data: 25 | artist = self.data["album"] 26 | track = None 27 | if "track" in self.data: 28 | track = self.data["track"] 29 | 30 | artist = str(artist) 31 | album = str(album) 32 | 33 | if track: 34 | subprocess.run( 35 | [ 36 | "id3v2", 37 | "-t", 38 | song, 39 | "-a", 40 | artist, 41 | "-A", 42 | album, 43 | "-T", 44 | str(self._track), 45 | self.file_name, 46 | ], 47 | cwd=self.cwd, 48 | timeout=self.timeout_s, 49 | ) 50 | else: 51 | subprocess.run( 52 | ["id3v2", "-t", song, "-a", artist, "-A", album, self.file_name], 53 | cwd=self.cwd, 54 | timeout=self.timeout_s, 55 | ) 56 | 57 | @staticmethod 58 | def validate(): 59 | try: 60 | proc = subprocess.run( 61 | ["id3v2"], 62 | stdout=subprocess.PIPE, 63 | timeout=10, 64 | ) 65 | except: 66 | return False 67 | return True 68 | -------------------------------------------------------------------------------- /utils/programwrappers/vlc.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import os 3 | 4 | 5 | class Vlc(object): 6 | def __init__(self, name, timeout_s=60 * 60): 7 | self.name = name 8 | self.timeout_s = timeout_s 9 | 10 | def run(self): 11 | data = subprocess.run( 12 | ["vlc", self.name, "vlc://quit"], 13 | stdout=subprocess.PIPE, 14 | stderr=subprocess.PIPE, 15 | timeout=self.timeout_s, 16 | ) 17 | 18 | @staticmethod 19 | def validate(): 20 | try: 21 | proc = subprocess.run( 22 | ["vlc"], stdout=subprocess.PIPE, stderr=subprocess.PIPE 23 | ) 24 | except: 25 | return False 26 | return True 27 | -------------------------------------------------------------------------------- /utils/programwrappers/wget.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import os 3 | import logging 4 | from pathlib import Path 5 | 6 | 7 | class Wget(object): 8 | def __init__(self, url=None, cwd=None, timeout_s=60 * 60): 9 | self.url = url 10 | self.cwd = cwd 11 | self.timeout_s = timeout_s 12 | 13 | def download_all(self): 14 | cmds = ["wget", "-p", "-k", self.url] 15 | 16 | proc = subprocess.run( 17 | cmds, capture_output=True, cwd=self.cwd, timeout=self.timeout_s 18 | ) 19 | 20 | if proc.returncode != 0: 21 | return None 22 | -------------------------------------------------------------------------------- /utils/repositoryinterface.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import shutil 3 | 4 | 5 | class RepositoryInterface(object): 6 | def __init__( 7 | self, export_data, timeout_s=60 * 60, operating_dir=None, data_source_dir=None 8 | ): 9 | self.export_data = export_data 10 | self.timeout_s = timeout_s 11 | if not operating_dir: 12 | self.operating_dir = self.export_data.local_path 13 | else: 14 | self.operating_dir = operating_dir 15 | 16 | self.data_source_directory = data_source_dir 17 | 18 | def get_operating_dir(self): 19 | """ 20 | repository path 21 | """ 22 | if self.operating_dir: 23 | return Path(self.operating_dir) 24 | 25 | def get_local_dir(self): 26 | """ 27 | local path within repository path (where operations on repository need to be made) 28 | """ 29 | return self.get_operating_dir() 30 | 31 | def get_data_source_directory(self): 32 | """ 33 | place where data are stored, needs to be copied to repository and pushed 34 | """ 35 | if self.data_source_directory: 36 | return Path(self.data_source_directory) 37 | 38 | def push_to_repo(self, commit_message): 39 | # just copy from write directory, to local directory 40 | self.copy_tree() 41 | 42 | def clear_operating_directory(self): 43 | dir = self.get_operating_dir() 44 | if dir.exists(): 45 | shutil.rmtree(dir) 46 | 47 | def copy_tree(self): 48 | expected_dir = self.get_local_dir() 49 | data_dir = self.data_source_directory 50 | 51 | if expected_dir != self.data_source_directory: 52 | shutil.copytree(data_dir, expected_dir, dirs_exist_ok=True) 53 | -------------------------------------------------------------------------------- /utils/serializers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data serializers. Mostly for Export / import 3 | """ 4 | 5 | from .youtubelinkjson import YouTubeJson 6 | from .htmlexporter import HtmlExporter, HtmlEntryExporter 7 | from .pagedisplay import PageDisplay, PageDisplayParser 8 | from .jsonimporter import JsonImporter, MapImporter 9 | from .converters import ( 10 | PageSystem, 11 | ModelCollectionConverter, 12 | JsonConverter, 13 | MarkDownConverter, 14 | MarkDownDynamicConverter, 15 | ) 16 | -------------------------------------------------------------------------------- /utils/services/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data serializers. Mostly for Export / import 3 | """ 4 | 5 | from .openrss import OpenRss 6 | from .internetarchive import InternetArchiveBuilder, InternetArchive 7 | from .translate import GoogleTranslate, TranslateBuilder 8 | from .validators import Validator, WhoIs, W3CValidator, SchemaOrg, BuildWith 9 | from .waybackmachine import WaybackMachine 10 | from .gitrepository import GitRepository 11 | from .emailreader import EmailReader 12 | 13 | from .servicedatareadinglist import ReadingList, ReadingListFile 14 | -------------------------------------------------------------------------------- /utils/services/internetarchive.py: -------------------------------------------------------------------------------- 1 | from utils.dateutils import DateUtils 2 | 3 | 4 | class InternetArchiveInterface(object): 5 | def __init__(self, url): 6 | self.url = url 7 | 8 | def get_archive_url(self): 9 | pass 10 | 11 | 12 | class InternetArchive(InternetArchiveInterface): 13 | def __init__(self, url): 14 | super().__init__(url) 15 | 16 | def get_archive_url(self, time=None): 17 | if not time: 18 | time = DateUtils.get_datetime_now_utc() 19 | 20 | if time: 21 | time_str = time.strftime("%Y%m%d") 22 | return "https://web.archive.org/web/{}110000*/".format(time_str) + self.url 23 | 24 | 25 | class InternetArchiveBuilder(object): 26 | def get(url): 27 | return InternetArchive(url) 28 | -------------------------------------------------------------------------------- /utils/services/openrss.py: -------------------------------------------------------------------------------- 1 | from rsshistory.webtools import HttpPageHandler 2 | from webtoolkit import UrlLocation, RssPage, Url 3 | 4 | 5 | class OpenRss(object): 6 | def __init__(self, url): 7 | self.url = url 8 | 9 | def find_rss_link(self): 10 | p = UrlLocation(self.url) 11 | url_procolles = p.get_protocolless() 12 | 13 | u = Url("https://openrss.org/" + url_procolles) 14 | u.options.mode = "standard" 15 | u.get_response() 16 | 17 | handler = u.get_handler() 18 | if type(handler) is HttpPageHandler: 19 | if type(handler.p) is RssPage: 20 | return handler.p.get_link() 21 | -------------------------------------------------------------------------------- /utils/services/servicedatareadinglist.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import sys 3 | import io 4 | 5 | """ 6 | This is reading list hosted by Thomas Pain 7 | https://www.tdpain.net/blog/a-year-of-reading 8 | """ 9 | 10 | 11 | class ReadingList(object): 12 | def __init__(self, contents=None): 13 | self.contents = contents 14 | 15 | def get_links(self): 16 | links = [] 17 | csv_file_like_object = io.StringIO(self.contents) 18 | 19 | reader = csv.DictReader(csv_file_like_object) 20 | for row in reader: 21 | links.append(row["url"]) 22 | 23 | return links 24 | 25 | def get_entries(self): 26 | entries = [] 27 | csv_file_like_object = io.StringIO(self.contents) 28 | 29 | reader = csv.DictReader(csv_file_like_object) 30 | try: 31 | for row in reader: 32 | entries.append(row) 33 | except csv.Error as e: 34 | sys.exit("file {}, line {}: {}".format(filename, reader.line_num, e)) 35 | 36 | return entries 37 | 38 | 39 | class ReadingListFile(object): 40 | def __init__(self, filename=None): 41 | self.filename = filename 42 | 43 | self.entries = [] 44 | self.read_file() 45 | 46 | def get_entries(self): 47 | data = None 48 | with open(self.filename, newline="") as f: 49 | data = f.read() 50 | 51 | if data: 52 | r = ReadingList(data) 53 | return r.get_entries() 54 | -------------------------------------------------------------------------------- /utils/services/translate.py: -------------------------------------------------------------------------------- 1 | from webtoolkit import UrlLocation 2 | 3 | 4 | class TranslatePage(object): 5 | def __init__(self, url): 6 | self.url = url 7 | 8 | def get_translate_url(self): 9 | return self.url 10 | 11 | 12 | class GoogleTranslate(TranslatePage): 13 | def __init__(self, url): 14 | super().__init__(url) 15 | 16 | def get_translate_url(self): 17 | p = UrlLocation(self.url) 18 | parts = p.parse_url() 19 | parts[2] = parts[2].replace("-", "--") 20 | parts[2] = parts[2].replace(".", "-") 21 | 22 | if parts[0] == "http": 23 | remainder = ( 24 | "?_x_tr_sch=http&_x_tr_sl=auto&_x_tr_tl=en&_x_tr_hl=en&_x_tr_pto=wapp" 25 | ) 26 | else: 27 | remainder = "?_x_tr_sl=auto&_x_tr_tl=en&_x_tr_hl=en&_x_tr_pto=wapp" 28 | 29 | if len(parts) > 4: 30 | return ( 31 | "https" 32 | + parts[1] 33 | + parts[2] 34 | + ".translate.goog" 35 | + parts[3] 36 | + remainder 37 | + parts[4].replace("?", "&") 38 | ) 39 | elif len(parts) > 3: 40 | return ( 41 | "https" + parts[1] + parts[2] + ".translate.goog" + parts[3] + remainder 42 | ) 43 | elif len(parts) > 2: 44 | return "https" + parts[1] + parts[2] + ".translate.goog/" + remainder 45 | 46 | 47 | class TranslateBuilder(object): 48 | def get(url): 49 | return GoogleTranslate(url) 50 | -------------------------------------------------------------------------------- /utils/services/validators.py: -------------------------------------------------------------------------------- 1 | import urllib.parse 2 | 3 | from webtoolkit import UrlLocation 4 | 5 | 6 | class Validator(object): 7 | def __init__(self, url): 8 | self.url = url 9 | 10 | def validate(self): 11 | pass 12 | 13 | def get_validate_url(self): 14 | pass 15 | 16 | def encode_url(self, url): 17 | return urllib.parse.quote(url) 18 | 19 | 20 | class WhoIs(Validator): 21 | def __init__(self, url): 22 | self.url = url 23 | 24 | def get_validate_url(self): 25 | p = UrlLocation(self.url) 26 | return "https://who.is/whois/" + p.get_domain_only() 27 | 28 | 29 | class W3CValidator(Validator): 30 | def __init__(self, url): 31 | self.url = url 32 | 33 | def get_validate_url(self): 34 | return "https://validator.w3.org/nu/?doc=" + self.encode_url(self.url) 35 | 36 | 37 | class SchemaOrg(Validator): 38 | def __init__(self, url): 39 | self.url = url 40 | 41 | def get_validate_url(self): 42 | return "https://validator.schema.org/#url=" + self.encode_url(self.url) 43 | 44 | 45 | class BuildWith(Validator): 46 | def __init__(self, url): 47 | self.url = url 48 | 49 | def get_validate_url(self): 50 | p = UrlLocation(self.url) 51 | return "https://builtwith.com/" + p.get_domain_only() 52 | --------------------------------------------------------------------------------