├── .flake8 ├── .gitattributes ├── .github └── workflows │ └── python.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── archive.sh ├── channels.json ├── configs ├── annotator_config.json ├── client_config.json ├── clusterer_config.json ├── daemon_config.json ├── mongo_config.json ├── ranker_config.json ├── renderer_config.json └── test_ranker_config.json ├── crawl.sh ├── crawler ├── __init__.py ├── fetch_times.json ├── pipelines.py ├── settings.py └── spiders │ ├── __init__.py │ └── telegram.py ├── crowd ├── aggregate.py ├── data │ ├── exam.tsv │ ├── honey.tsv │ └── training.tsv ├── download.py ├── sample.py ├── upload.py └── util.py ├── docker-compose.yml ├── download_models.sh ├── nyan ├── __init__.py ├── annotator.py ├── channels.py ├── classifier.py ├── client.py ├── clip.py ├── clusterer.py ├── clusters.py ├── daemon.py ├── document.py ├── embedder.py ├── fasttext_clf.py ├── image.py ├── mongo.py ├── openai.py ├── prompts │ ├── category.txt │ ├── diff.txt │ └── topics.txt ├── ranker.py ├── renderer.py ├── send.py ├── templates │ ├── cluster.html │ └── topics.html ├── text.py ├── title.py ├── tokenizer.py ├── topics.py └── util.py ├── requirements.txt ├── scrapy.cfg ├── scripts ├── agency2vec.py ├── annotate_categories.py ├── annotation_bot.py ├── clean_docs.py ├── clusters_to_jsonl.py ├── convert_to_tsv.py ├── eval_clf.py ├── eval_embeddings.py ├── eval_embeddings_toloka.py ├── extract_headlines.py ├── extract_replies_dataset.py ├── filter_documents.py ├── filter_posted_clusers.py ├── list_channels.py ├── map_lenta.py ├── mongo_to_jsonl.py ├── rss.py ├── single_cluster_annotation_bot.py └── train_clf.py ├── send.sh └── tests ├── __init__.py ├── canonize.py ├── conftest.py ├── data ├── clip.jsonl ├── input_docs.jsonl ├── output_clusters.jsonl └── output_docs.jsonl ├── test_annotator.py ├── test_clip.py ├── test_clusterer.py └── test_lang_detector.py /.flake8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/.flake8 -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/.gitattributes -------------------------------------------------------------------------------- /.github/workflows/python.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/.github/workflows/python.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/README.md -------------------------------------------------------------------------------- /archive.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/archive.sh -------------------------------------------------------------------------------- /channels.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/channels.json -------------------------------------------------------------------------------- /configs/annotator_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/configs/annotator_config.json -------------------------------------------------------------------------------- /configs/client_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/configs/client_config.json -------------------------------------------------------------------------------- /configs/clusterer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/configs/clusterer_config.json -------------------------------------------------------------------------------- /configs/daemon_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/configs/daemon_config.json -------------------------------------------------------------------------------- /configs/mongo_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/configs/mongo_config.json -------------------------------------------------------------------------------- /configs/ranker_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/configs/ranker_config.json -------------------------------------------------------------------------------- /configs/renderer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/configs/renderer_config.json -------------------------------------------------------------------------------- /configs/test_ranker_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/configs/test_ranker_config.json -------------------------------------------------------------------------------- /crawl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/crawl.sh -------------------------------------------------------------------------------- /crawler/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /crawler/fetch_times.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /crawler/pipelines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/crawler/pipelines.py -------------------------------------------------------------------------------- /crawler/settings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/crawler/settings.py -------------------------------------------------------------------------------- /crawler/spiders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/crawler/spiders/__init__.py -------------------------------------------------------------------------------- /crawler/spiders/telegram.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/crawler/spiders/telegram.py -------------------------------------------------------------------------------- /crowd/aggregate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/crowd/aggregate.py -------------------------------------------------------------------------------- /crowd/data/exam.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/crowd/data/exam.tsv -------------------------------------------------------------------------------- /crowd/data/honey.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/crowd/data/honey.tsv -------------------------------------------------------------------------------- /crowd/data/training.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/crowd/data/training.tsv -------------------------------------------------------------------------------- /crowd/download.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/crowd/download.py -------------------------------------------------------------------------------- /crowd/sample.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/crowd/sample.py -------------------------------------------------------------------------------- /crowd/upload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/crowd/upload.py -------------------------------------------------------------------------------- /crowd/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/crowd/util.py -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/docker-compose.yml -------------------------------------------------------------------------------- /download_models.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/download_models.sh -------------------------------------------------------------------------------- /nyan/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nyan/annotator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/annotator.py -------------------------------------------------------------------------------- /nyan/channels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/channels.py -------------------------------------------------------------------------------- /nyan/classifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/classifier.py -------------------------------------------------------------------------------- /nyan/client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/client.py -------------------------------------------------------------------------------- /nyan/clip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/clip.py -------------------------------------------------------------------------------- /nyan/clusterer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/clusterer.py -------------------------------------------------------------------------------- /nyan/clusters.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/clusters.py -------------------------------------------------------------------------------- /nyan/daemon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/daemon.py -------------------------------------------------------------------------------- /nyan/document.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/document.py -------------------------------------------------------------------------------- /nyan/embedder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/embedder.py -------------------------------------------------------------------------------- /nyan/fasttext_clf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/fasttext_clf.py -------------------------------------------------------------------------------- /nyan/image.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/image.py -------------------------------------------------------------------------------- /nyan/mongo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/mongo.py -------------------------------------------------------------------------------- /nyan/openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/openai.py -------------------------------------------------------------------------------- /nyan/prompts/category.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/prompts/category.txt -------------------------------------------------------------------------------- /nyan/prompts/diff.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/prompts/diff.txt -------------------------------------------------------------------------------- /nyan/prompts/topics.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/prompts/topics.txt -------------------------------------------------------------------------------- /nyan/ranker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/ranker.py -------------------------------------------------------------------------------- /nyan/renderer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/renderer.py -------------------------------------------------------------------------------- /nyan/send.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/send.py -------------------------------------------------------------------------------- /nyan/templates/cluster.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/templates/cluster.html -------------------------------------------------------------------------------- /nyan/templates/topics.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/templates/topics.html -------------------------------------------------------------------------------- /nyan/text.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/text.py -------------------------------------------------------------------------------- /nyan/title.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/title.py -------------------------------------------------------------------------------- /nyan/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/tokenizer.py -------------------------------------------------------------------------------- /nyan/topics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/topics.py -------------------------------------------------------------------------------- /nyan/util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/nyan/util.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/requirements.txt -------------------------------------------------------------------------------- /scrapy.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/scrapy.cfg -------------------------------------------------------------------------------- /scripts/agency2vec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/scripts/agency2vec.py -------------------------------------------------------------------------------- /scripts/annotate_categories.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/scripts/annotate_categories.py -------------------------------------------------------------------------------- /scripts/annotation_bot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/scripts/annotation_bot.py -------------------------------------------------------------------------------- /scripts/clean_docs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/scripts/clean_docs.py -------------------------------------------------------------------------------- /scripts/clusters_to_jsonl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/scripts/clusters_to_jsonl.py -------------------------------------------------------------------------------- /scripts/convert_to_tsv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/scripts/convert_to_tsv.py -------------------------------------------------------------------------------- /scripts/eval_clf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/scripts/eval_clf.py -------------------------------------------------------------------------------- /scripts/eval_embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/scripts/eval_embeddings.py -------------------------------------------------------------------------------- /scripts/eval_embeddings_toloka.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/scripts/eval_embeddings_toloka.py -------------------------------------------------------------------------------- /scripts/extract_headlines.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/scripts/extract_headlines.py -------------------------------------------------------------------------------- /scripts/extract_replies_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/scripts/extract_replies_dataset.py -------------------------------------------------------------------------------- /scripts/filter_documents.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/scripts/filter_documents.py -------------------------------------------------------------------------------- /scripts/filter_posted_clusers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/scripts/filter_posted_clusers.py -------------------------------------------------------------------------------- /scripts/list_channels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/scripts/list_channels.py -------------------------------------------------------------------------------- /scripts/map_lenta.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/scripts/map_lenta.py -------------------------------------------------------------------------------- /scripts/mongo_to_jsonl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/scripts/mongo_to_jsonl.py -------------------------------------------------------------------------------- /scripts/rss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/scripts/rss.py -------------------------------------------------------------------------------- /scripts/single_cluster_annotation_bot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/scripts/single_cluster_annotation_bot.py -------------------------------------------------------------------------------- /scripts/train_clf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/scripts/train_clf.py -------------------------------------------------------------------------------- /send.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/send.sh -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/canonize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/tests/canonize.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/data/clip.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/tests/data/clip.jsonl -------------------------------------------------------------------------------- /tests/data/input_docs.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/tests/data/input_docs.jsonl -------------------------------------------------------------------------------- /tests/data/output_clusters.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/tests/data/output_clusters.jsonl -------------------------------------------------------------------------------- /tests/data/output_docs.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/tests/data/output_docs.jsonl -------------------------------------------------------------------------------- /tests/test_annotator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/tests/test_annotator.py -------------------------------------------------------------------------------- /tests/test_clip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/tests/test_clip.py -------------------------------------------------------------------------------- /tests/test_clusterer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/tests/test_clusterer.py -------------------------------------------------------------------------------- /tests/test_lang_detector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NyanNyanovich/nyan/HEAD/tests/test_lang_detector.py --------------------------------------------------------------------------------