├── tgbot
    ├── app
    │   ├── __init__.py
    │   ├── aiosqlite_wrapper.py
    │   └── exceptions.py
    ├── views
    │   ├── __init__.py
    │   └── telegram
    │   │   ├── __init__.py
    │   │   ├── document_list_widget.py
    │   │   ├── common.py
    │   │   └── progress_bar.py
    ├── widgets
    │   └── __init__.py
    ├── configs
    │   ├── development.yaml
    │   ├── __init__.py
    │   ├── logging.yaml
    │   └── base.yaml
    ├── .gitignore
    ├── promotions
    │   ├── __init__.py
    │   └── promotions.yaml
    ├── handlers
    │   ├── stop.py
    │   ├── noop.py
    │   ├── aboutus.py
    │   ├── librarian.py
    │   ├── help.py
    │   ├── howtohelp.py
    │   ├── shortlink.py
    │   ├── close.py
    │   ├── report.py
    │   ├── roll.py
    │   ├── q.py
    │   ├── view.py
    │   ├── start.py
    │   ├── submit.py
    │   ├── vote.py
    │   ├── mlt.py
    │   ├── cybrex.py
    │   └── riot.py
    ├── translations
    │   └── __init__.py
    ├── requirements.txt
    ├── Dockerfile
    ├── main.py
    ├── README.md
    └── markdownifytg.py
├── web
    ├── .prettierrc.json
    ├── .eslintignore
    ├── service-worker.js
    ├── public
    │   ├── favicon.ico
    │   ├── favicon-dark.png
    │   ├── mstile-70x70.png
    │   ├── default-cover.jpg
    │   ├── favicon-16x16.png
    │   ├── favicon-32x32.png
    │   ├── favicon-light.png
    │   ├── mstile-144x144.png
    │   ├── mstile-150x150.png
    │   ├── mstile-310x150.png
    │   ├── mstile-310x310.png
    │   ├── apple-touch-icon.png
    │   ├── android-chrome-192x192.png
    │   ├── android-chrome-512x512.png
    │   ├── android-chrome-maskable-192x192.png
    │   ├── android-chrome-maskable-512x512.png
    │   ├── browserconfig.xml
    │   ├── sitemap.xml
    │   ├── site.webmanifest
    │   ├── favicon-black.svg
    │   ├── favicon.svg
    │   └── safari-pinned-tab.svg
    ├── src
    │   ├── services
    │   │   ├── search
    │   │   │   ├── index.ts
    │   │   │   ├── search-service.ts
    │   │   │   └── query-processor.ts
    │   │   ├── index.ts
    │   │   └── user-service.ts
    │   ├── assets
    │   │   └── origin.jpg
    │   ├── views
    │   │   ├── HowToSearchView.vue
    │   │   ├── InstallIpfsView.vue
    │   │   ├── DoomsdayView.vue
    │   │   ├── IntroView.vue
    │   │   ├── StcHubApiView.vue
    │   │   ├── StcBoxView.vue
    │   │   ├── DonateView.vue
    │   │   ├── DocumentView.vue
    │   │   ├── Reader.vue
    │   │   └── BookmarksView.vue
    │   ├── components
    │   │   ├── TagsList.vue
    │   │   ├── LoadingSpinner.vue
    │   │   ├── QrCode.vue
    │   │   ├── ReferencesList.vue
    │   │   ├── SearchList.vue
    │   │   ├── ConnectivityIssues.vue
    │   │   ├── DocumentButtons.vue
    │   │   ├── DjvuReader.vue
    │   │   ├── EpubReader.vue
    │   │   ├── DocumentSnippet.vue
    │   │   ├── download-progress.ts
    │   │   └── PdfReader.vue
    │   ├── main.ts
    │   ├── App.vue
    │   ├── database.ts
    │   ├── router
    │   │   └── index.ts
    │   ├── utils.ts
    │   └── scss
    │   │   └── styles.scss
    ├── summa-config.json
    ├── tsconfig.config.json
    ├── .gitignore
    ├── env.d.ts
    ├── vite-sw.config.ts
    ├── publi.sh
    ├── tsconfig.json
    ├── README.md
    ├── index.html
    ├── .eslintrc.js
    ├── vite.config.ts
    └── package.json
├── cybrex
    ├── cybrex
    │   ├── __init__.py
    │   ├── chains
    │   │   ├── base.py
    │   │   ├── __init__.py
    │   │   └── map_reduce.py
    │   ├── prompts
    │   │   └── __init__.py
    │   ├── vector_storage
    │   │   ├── __init__.py
    │   │   └── base.py
    │   ├── exceptions.py
    │   ├── data_source
    │   │   ├── base.py
    │   │   └── geck_data_source.py
    │   ├── utils.py
    │   └── llm_manager.py
    ├── MANIFEST.in
    ├── .gitignore
    ├── .isort.cfg
    ├── .flake8
    ├── requirements.txt
    ├── pyproject.toml
    ├── examples
    │   ├── on-the-fly-translation.ipynb
    │   └── analyse-references.ipynb
    └── README.md
├── geck
    ├── stc_geck
    │   ├── __init__.py
    │   ├── exceptions.py
    │   └── utils.py
    ├── MANIFEST.in
    ├── .gitignore
    ├── .isort.cfg
    ├── .flake8
    ├── requirements.txt
    └── pyproject.toml
├── library
    ├── sciparse
    │   ├── __init__.py
    │   ├── models
    │   │   ├── .gitignore
    │   │   └── lid.176.ftz
    │   ├── exceptions.py
    │   ├── language_detect.py
    │   └── cli.py
    ├── telegram
    │   ├── README.md
    │   ├── __init__.py
    │   ├── session_backend
    │   │   ├── __init__.py
    │   │   └── core_postgres.py
    │   ├── common.py
    │   ├── promotioner.py
    │   └── utils.py
    ├── .gitignore
    ├── user_manager
    │   ├── __init__.py
    │   └── user_manager.py
    └── textutils
    │   ├── __init__.py
    │   ├── html_processing.py
    │   └── utils.py
├── .flake8
├── .gitignore
├── .isort.cfg
├── .env.light
└── docker-compose.light.yml


/tgbot/app/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tgbot/views/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/web/.prettierrc.json:
--------------------------------------------------------------------------------
1 | {}


--------------------------------------------------------------------------------
/cybrex/cybrex/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/cybrex/cybrex/chains/base.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/geck/stc_geck/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/library/sciparse/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/library/telegram/README.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/library/telegram/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tgbot/widgets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/cybrex/cybrex/chains/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/cybrex/cybrex/prompts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tgbot/views/telegram/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tgbot/configs/development.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | 


--------------------------------------------------------------------------------
/cybrex/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt


--------------------------------------------------------------------------------
/cybrex/cybrex/vector_storage/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/geck/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt


--------------------------------------------------------------------------------
/library/sciparse/models/.gitignore:
--------------------------------------------------------------------------------
1 | lid.176.bin


--------------------------------------------------------------------------------
/cybrex/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg-info
2 | __pycache__
3 | dist


--------------------------------------------------------------------------------
/geck/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg-info
2 | __pycache__
3 | dist


--------------------------------------------------------------------------------
/tgbot/.gitignore:
--------------------------------------------------------------------------------
1 | bots.db
2 | configs/production.yaml


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | exclude = venv/*
3 | max-line-length = 160


--------------------------------------------------------------------------------
/library/.gitignore:
--------------------------------------------------------------------------------
1 | actions
2 | integral
3 | pdftools
4 | siteparsers


--------------------------------------------------------------------------------
/web/.eslintignore:
--------------------------------------------------------------------------------
1 | .eslintrc.js
2 | public/*
3 | service-worker.js


--------------------------------------------------------------------------------
/web/service-worker.js:
--------------------------------------------------------------------------------
1 | node_modules/summa-wasm/dist/service-worker.js


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | fabrica
3 | infra
4 | venv
5 | docker-compose.yml
6 | __pycache__
7 | 


--------------------------------------------------------------------------------
/web/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/favicon.ico


--------------------------------------------------------------------------------
/web/src/services/search/index.ts:
--------------------------------------------------------------------------------
1 | export {IpfsSearchService} from './ipfs-search-service'
2 | 


--------------------------------------------------------------------------------
/web/public/favicon-dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/favicon-dark.png


--------------------------------------------------------------------------------
/web/public/mstile-70x70.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/mstile-70x70.png


--------------------------------------------------------------------------------
/web/src/assets/origin.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/src/assets/origin.jpg


--------------------------------------------------------------------------------
/web/public/default-cover.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/default-cover.jpg


--------------------------------------------------------------------------------
/web/public/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/favicon-16x16.png


--------------------------------------------------------------------------------
/web/public/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/favicon-32x32.png


--------------------------------------------------------------------------------
/web/public/favicon-light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/favicon-light.png


--------------------------------------------------------------------------------
/web/public/mstile-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/mstile-144x144.png


--------------------------------------------------------------------------------
/web/public/mstile-150x150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/mstile-150x150.png


--------------------------------------------------------------------------------
/web/public/mstile-310x150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/mstile-310x150.png


--------------------------------------------------------------------------------
/web/public/mstile-310x310.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/mstile-310x310.png


--------------------------------------------------------------------------------
/library/user_manager/__init__.py:
--------------------------------------------------------------------------------
1 | from .user_manager import UserManager
2 | 
3 | __all__ = ['UserManager']
4 | 


--------------------------------------------------------------------------------
/web/public/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/apple-touch-icon.png


--------------------------------------------------------------------------------
/library/sciparse/models/lid.176.ftz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/library/sciparse/models/lid.176.ftz


--------------------------------------------------------------------------------
/web/public/android-chrome-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/android-chrome-192x192.png


--------------------------------------------------------------------------------
/web/public/android-chrome-512x512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/android-chrome-512x512.png


--------------------------------------------------------------------------------
/web/src/services/index.ts:
--------------------------------------------------------------------------------
1 | export { IpfsSearchService } from './search'
2 | export { UserService } from './user-service'
3 | 


--------------------------------------------------------------------------------
/geck/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | skip_glob=**/venv/**
3 | include_trailing_comma=True
4 | multi_line_output=3
5 | force_grid_wrap=2
6 | 


--------------------------------------------------------------------------------
/cybrex/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | skip_glob=**/venv/**
3 | include_trailing_comma=True
4 | multi_line_output=3
5 | force_grid_wrap=2
6 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | skip_glob=**/venv/**
3 | include_trailing_comma=True
4 | multi_line_output=3
5 | force_grid_wrap=2
6 | line_length=120


--------------------------------------------------------------------------------
/library/sciparse/exceptions.py:
--------------------------------------------------------------------------------
1 | from aiobaseclient.exceptions import BadRequestError
2 | 
3 | __all__ = [
4 |     'BadRequestError',
5 | ]
6 | 


--------------------------------------------------------------------------------
/library/telegram/session_backend/__init__.py:
--------------------------------------------------------------------------------
1 | from .sqlalchemy import AlchemySessionContainer
2 | 
3 | __all__ = ['AlchemySessionContainer']
4 | 


--------------------------------------------------------------------------------
/web/public/android-chrome-maskable-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/android-chrome-maskable-192x192.png


--------------------------------------------------------------------------------
/web/public/android-chrome-maskable-512x512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/android-chrome-maskable-512x512.png


--------------------------------------------------------------------------------
/cybrex/cybrex/exceptions.py:
--------------------------------------------------------------------------------
1 | from izihawa_utils.exceptions import BaseError
2 | 
3 | 
4 | class QdrantStorageNotAvailableError(BaseError):
5 |     pass
6 | 


--------------------------------------------------------------------------------
/cybrex/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | exclude =
 3 |     .git,
 4 |     __pycache__,
 5 |     venv,
 6 |     build,
 7 |     dist,
 8 | ignore = I, W503
 9 | max-line-length = 140
10 | 


--------------------------------------------------------------------------------
/geck/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | exclude =
 3 |     .git,
 4 |     __pycache__,
 5 |     venv,
 6 |     build,
 7 |     dist,
 8 | ignore = I, W503
 9 | max-line-length = 140
10 | 


--------------------------------------------------------------------------------
/web/summa-config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "index": "bafyb4iadbza7ckc3djc2k5lfaorwaufcjurzxzkjsj5e7qt2wrguqs7ywm",
3 |   "ipfs_api_multiaddr": "/ip4/10.1.2.3/tcp/5001",
4 |   "ipfs_http_base_url": "http://10.1.2.3:8080"
5 | }
6 | 


--------------------------------------------------------------------------------
/tgbot/promotions/__init__.py:
--------------------------------------------------------------------------------
1 | from izihawa_configurator import Configurator
2 | 
3 | 
4 | def get_promotions():
5 |     return Configurator(['tgbot/promotions/promotions.yaml'])['promotions']
6 | 
7 | 
8 | promotions = get_promotions()
9 | 


--------------------------------------------------------------------------------
/web/src/services/user-service.ts:
--------------------------------------------------------------------------------
 1 | export class UserService {
 2 |   liked_items: string[]
 3 | 
 4 |   constructor () {
 5 |     this.liked_items = []
 6 |   }
 7 | 
 8 |   like (item: string) {
 9 |     this.liked_items.push(item)
10 |   }
11 | }
12 | 


--------------------------------------------------------------------------------
/geck/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiohttp>=3.8.5
 2 | aiokit>=1.2.3
 3 | aiosumma>=2.47.1
 4 | humanfriendly>=10.0
 5 | ipfs-hamt-directory-py>=0.1.1
 6 | izihawa-ipfs-api>=1.0.7
 7 | izihawa-utils>=1.1.3
 8 | multidict>=6.0.4
 9 | summa-embed>=0.20.2
10 | termcolor>=2.3.0
11 | fire>=0.5.0


--------------------------------------------------------------------------------
/web/public/browserconfig.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <browserconfig>
 3 |     <msapplication>
 4 |         <tile>
 5 |             <square150x150logo src="/mstile-150x150.png"/>
 6 |             <TileColor>#ffc40d</TileColor>
 7 |         </tile>
 8 |     </msapplication>
 9 | </browserconfig>
10 | 


--------------------------------------------------------------------------------
/cybrex/cybrex/vector_storage/base.py:
--------------------------------------------------------------------------------
 1 | from typing import (
 2 |     Iterable,
 3 |     List,
 4 |     Optional,
 5 |     Tuple,
 6 | )
 7 | 
 8 | 
 9 | class BaseVectorStorage:
10 |     def query(self, query_embedding: List[float], n_chunks: int, field_values: Optional[Iterable[Tuple[str, str]]] = None):
11 |         raise NotImplementedError()
12 | 


--------------------------------------------------------------------------------
/web/public/sitemap.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
 3 |    <url>
 4 |       <loc>https://libstc.cc/</loc>
 5 |       <lastmod>2023-04-12</lastmod>
 6 |    </url>
 7 |    <url>
 8 |       <loc>https://libstc.cc/#/about</loc>
 9 |       <lastmod>2023-04-12</lastmod>
10 |    </url>
11 | </urlset>
12 | 


--------------------------------------------------------------------------------
/geck/stc_geck/exceptions.py:
--------------------------------------------------------------------------------
 1 | from izihawa_utils.exceptions import BaseError
 2 | 
 3 | 
 4 | class IpfsConnectionError(BaseError):
 5 |     pass
 6 | 
 7 | 
 8 | class ItemNotFound(BaseError):
 9 |     def __init__(self, query):
10 |         self.query = query
11 | 
12 | 
13 | class CidNotFound(BaseError):
14 |     def __init__(self, query):
15 |         self.query = query
16 | 


--------------------------------------------------------------------------------
/tgbot/configs/__init__.py:
--------------------------------------------------------------------------------
 1 | from izihawa_configurator import Configurator
 2 | from izihawa_utils import env
 3 | 
 4 | 
 5 | def get_config():
 6 |     return Configurator([
 7 |         'tgbot/configs/base.yaml',
 8 |         'tgbot/configs/%s.yaml?' % env.type,
 9 |         'tgbot/configs/logging.yaml',
10 |     ], env_prefix='STC_TGBOT')
11 | 
12 | 
13 | config = get_config()
14 | 


--------------------------------------------------------------------------------
/library/telegram/common.py:
--------------------------------------------------------------------------------
 1 | from telethon import Button
 2 | 
 3 | 
 4 | def close_button(session_id: str = None):
 5 |     if session_id:
 6 |         return Button.inline(
 7 |             text='✖️',
 8 |             data=f'/close_{session_id}',
 9 |         )
10 |     else:
11 |         return Button.inline(
12 |             text='✖️',
13 |             data='/close',
14 |         )
15 | 


--------------------------------------------------------------------------------
/web/tsconfig.config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "extends": "@tsconfig/node18/tsconfig.json",
 3 |   "include": [
 4 |     "summa-config.ts",
 5 |     "vite.config.ts",
 6 |     "vite-sw.config.ts",
 7 |     "vitest.config.ts",
 8 |     "cypress.config.*"
 9 |   ],
10 |   "compilerOptions": {
11 |     "composite": true,
12 |     "moduleResolution": "Node",
13 |     "resolveJsonModule": true,
14 |     "types": ["node"]
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/tgbot/handlers/stop.py:
--------------------------------------------------------------------------------
 1 | from telethon import events
 2 | 
 3 | from library.telegram.base import RequestContext
 4 | 
 5 | from .base import BaseHandler
 6 | 
 7 | 
 8 | class StopHandler(BaseHandler):
 9 |     filter = events.NewMessage(incoming=True, pattern='^/stop$')
10 | 
11 |     async def handler(self, event: events.ChatAction, request_context: RequestContext):
12 |         request_context.statbox(action='show', mode='stop')
13 | 


--------------------------------------------------------------------------------
/.env.light:
--------------------------------------------------------------------------------
 1 | COMPOSE_PATH_SEPARATOR=:
 2 | COMPOSE_FILE=docker-compose.light.yml
 3 | COMPOSE_PROJECT_NAME=light
 4 | 
 5 | # Retrieve next two parameters at https://my.telegram.org
 6 | STC_TGBOT_application.default_bot.app_id=...
 7 | STC_TGBOT_application.default_bot.app_hash=...
 8 | 
 9 | # Register your bot at @BotFather in Telegram
10 | STC_TGBOT_application.default_bot.bot_name=...
11 | STC_TGBOT_application.default_bot.bot_token=...
12 | 


--------------------------------------------------------------------------------
/web/.gitignore:
--------------------------------------------------------------------------------
 1 | # Logs
 2 | logs
 3 | *.log
 4 | npm-debug.log*
 5 | yarn-debug.log*
 6 | yarn-error.log*
 7 | pnpm-debug.log*
 8 | lerna-debug.log*
 9 | 
10 | node_modules
11 | .DS_Store
12 | dist
13 | dist-ssr
14 | coverage
15 | *.local
16 | 
17 | /cypress/videos/
18 | /cypress/screenshots/
19 | 
20 | # Editor directories and files
21 | .vscode/*
22 | !.vscode/extensions.json
23 | .idea
24 | *.suo
25 | *.ntvs*
26 | *.njsproj
27 | *.sln
28 | *.sw?
29 | 


--------------------------------------------------------------------------------
/cybrex/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiokit>=1.2.3
 2 | beautifulsoup4>=4.12.2
 3 | ctransformers>=0.2.17
 4 | FlagEmbedding>=1.1.2
 5 | InstructorEmbedding>=1.0.1
 6 | izihawa-configurator>=1.0.4
 7 | izihawa-utils>=1.1.3
 8 | keybert>=0.7.0
 9 | langchain>=0.0.222
10 | lazy>=1.5
11 | lxml>=4.9.3
12 | openai>=0.27.8
13 | orjson
14 | pypdf>=3.12.0
15 | pyyaml>=6.0
16 | qdrant_client>=1.5.4
17 | tiktoken>=0.5.1
18 | safetensors==0.3.1
19 | stc-geck>=1.8.35
20 | unstructured[html]>=0.10.28
21 | 


--------------------------------------------------------------------------------
/tgbot/handlers/noop.py:
--------------------------------------------------------------------------------
 1 | from telethon import events
 2 | 
 3 | from library.telegram.base import RequestContext
 4 | 
 5 | from .base import BaseCallbackQueryHandler
 6 | 
 7 | 
 8 | class NoopHandler(BaseCallbackQueryHandler):
 9 |     filter = events.CallbackQuery(pattern='^/noop$')
10 | 
11 |     async def handler(self, event: events.ChatAction, request_context: RequestContext):
12 |         request_context.statbox(action='start', mode='noop')
13 |         await event.answer()
14 | 


--------------------------------------------------------------------------------
/web/env.d.ts:
--------------------------------------------------------------------------------
 1 | import 'vite/client'
 2 | 
 3 | import { type SearchService } from '@/services/summa'
 4 | 
 5 | declare module '@vue/runtime-core' {
 6 |   interface ComponentCustomProperties {
 7 |     search_service: SearchService
 8 |   }
 9 | }
10 | 
11 | declare global {
12 |   namespace NodeJS {
13 |     interface ProcessEnv {
14 |       GITHUB_AUTH_TOKEN: string
15 |       NODE_ENV: 'development' | 'production'
16 |       PORT?: string
17 |       PWD: string
18 |     }
19 |   }
20 | }
21 | 


--------------------------------------------------------------------------------
/web/src/views/HowToSearchView.vue:
--------------------------------------------------------------------------------
 1 | <template lang="pug">
 2 | .container
 3 |   h2 {{ get_label('how_to_search') }}
 4 |   span(v-html="get_label('help_content')")
 5 | </template>
 6 | <script lang="ts">
 7 | import { defineComponent } from 'vue'
 8 | import {get_label} from "@/translations";
 9 | export default defineComponent({
10 |   name: 'HowToSearchView',
11 |   created () {
12 |     document.title = `${get_label('how_to_search')} - ${get_label('help')} - STC`
13 |   }
14 | })
15 | </script>
16 | 


--------------------------------------------------------------------------------
/web/src/views/InstallIpfsView.vue:
--------------------------------------------------------------------------------
 1 | <template lang="pug">
 2 | .container
 3 |   h3 {{ get_label('install_ipfs') }}
 4 |   span(v-html="get_label('install_ipfs_content')")
 5 | </template>
 6 | <script lang="ts">
 7 | import { defineComponent } from 'vue'
 8 | import { get_label } from "@/translations";
 9 | 
10 | export default defineComponent({
11 |   name: 'InstallIpfsView',
12 |   created () {
13 |     document.title = `${get_label('install_ipfs')} - Help - STC`
14 |   }
15 | })
16 | </script>
17 | 


--------------------------------------------------------------------------------
/tgbot/translations/__init__.py:
--------------------------------------------------------------------------------
 1 | from izihawa_configurator import Configurator
 2 | 
 3 | 
 4 | def get_translations():
 5 |     return Configurator([
 6 |         'tgbot/translations/translations.yaml',
 7 |     ])
 8 | 
 9 | 
10 | def t(label, language='en'):
11 |     if language in _translations and label in _translations[language]:
12 |         return _translations[language][label]
13 |     return _translations['en'][label]
14 | 
15 | 
16 | _translations = get_translations()
17 | 
18 | 
19 | __all__ = ['t']
20 | 


--------------------------------------------------------------------------------
/web/src/components/TagsList.vue:
--------------------------------------------------------------------------------
 1 | <template lang="pug">
 2 | div
 3 |   span(v-for="(tag, index) in tags")
 4 |     span.ms-1.me-1(v-if="index > 0") -
 5 |     router-link.text-decoration-none(:to="'/?q=tags:\"' + tag + '\"'") {{ tag }}
 6 | </template>
 7 | 
 8 | <script lang="ts">
 9 | import { defineComponent, type PropType } from 'vue'
10 | 
11 | export default defineComponent({
12 |   name: 'TagsList',
13 |   props: {
14 |     tags: {
15 |       type: Array as PropType<string[]>
16 |     }
17 |   }
18 | })
19 | </script>
20 | 


--------------------------------------------------------------------------------
/tgbot/requirements.txt:
--------------------------------------------------------------------------------
 1 | beautifulsoup4
 2 | lxml
 3 | pandas
 4 | fasttext-wheel
 5 | PyCryptodome
 6 | pypdf>=3.17.0
 7 | seaborn
 8 | 
 9 | aiobaseclient
10 | aiocrossref
11 | aiokit>=1.2.3
12 | aiosqlite
13 | aiosumma>=2.47.3
14 | bleach
15 | base36
16 | cybrex[petals]>=1.11.11
17 | dateparser
18 | emoji
19 | isbnlib>=3.10.13
20 | izihawa_configurator>=1.0.4
21 | izihawa_ipfs_api>=1.0.0
22 | izihawa_loglib>=1.0.2
23 | izihawa_utils
24 | lru-dict
25 | markdownify
26 | sqlalchemy
27 | stc-geck>=1.8.38
28 | 
29 | telethon==1.30.3
30 | 


--------------------------------------------------------------------------------
/tgbot/app/aiosqlite_wrapper.py:
--------------------------------------------------------------------------------
 1 | import aiosqlite
 2 | from aiokit import AioThing
 3 | 
 4 | 
 5 | def dict_factory(cursor, row):
 6 |     d = {}
 7 |     for idx, col in enumerate(cursor.description):
 8 |         d[col[0]] = row[idx]
 9 |     return d
10 | 
11 | 
12 | class AioSqlite(AioThing):
13 |     def __init__(self, db_name):
14 |         super().__init__()
15 |         self.db = aiosqlite.connect(db_name)
16 | 
17 |     async def start(self):
18 |         self.db = await self.db
19 |         self.db.row_factory = dict_factory
20 | 


--------------------------------------------------------------------------------
/docker-compose.light.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   ipfs:
 3 |     image: ipfs/kubo:latest
 4 |     environment:
 5 |       IPFS_PROFILE: server
 6 |     ports:
 7 |       - 8080:8080
 8 |     volumes:
 9 |       - /Users/pasha/data-ipfs:/data/ipfs
10 |   tgbot:
11 |     build:
12 |       context: .
13 |       dockerfile: tgbot/Dockerfile
14 |     depends_on:
15 |       ipfs:
16 |         condition: service_healthy
17 |     env_file:
18 |       - .env.light
19 |     restart: always
20 |     volumes:
21 |       - /Users/pasha/tmp:/usr/lib/stc-tgbot
22 | 


--------------------------------------------------------------------------------
/web/public/site.webmanifest:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "",
 3 |     "short_name": "",
 4 |     "icons": [
 5 |         {
 6 |             "src": "./android-chrome-192x192.png",
 7 |             "sizes": "192x192",
 8 |             "type": "image/png"
 9 |         },
10 |         {
11 |             "src": "./android-chrome-512x512.png",
12 |             "sizes": "512x512",
13 |             "type": "image/png"
14 |         }
15 |     ],
16 |     "theme_color": "#ffffff",
17 |     "background_color": "#ffffff",
18 |     "display": "standalone"
19 | }
20 | 


--------------------------------------------------------------------------------
/web/vite-sw.config.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig } from 'vite'
 2 | 
 3 | // https://vitejs.dev/config/
 4 | export default defineConfig({
 5 |   base: '',
 6 |   build: {
 7 |     emptyOutDir: false,
 8 |     rollupOptions: {
 9 |       input: {
10 |         'service-worker': './node_modules/summa-wasm/dist/service-worker.js',
11 |       },
12 |       output: [
13 |         {
14 |           entryFileNames: () => {
15 |             return '[name].js'
16 |           }
17 |         }
18 |       ]
19 |     },
20 |     target: 'esnext'
21 |   }
22 | })
23 | 


--------------------------------------------------------------------------------
/web/src/views/DoomsdayView.vue:
--------------------------------------------------------------------------------
 1 | <template lang="pug">
 2 | .container
 3 |   h3 Doomsday Guide
 4 |   p In the event of an extinction-level occurrence (such as a nuclear war, global pandemic, or impact event), this space will provide links to a rescue kit and literature on the subject of civilization restoration.
 5 | </template>
 6 | <script lang="ts">
 7 | import { defineComponent } from 'vue'
 8 | export default defineComponent({
 9 |   name: 'DoomsdayView',
10 |   created () {
11 |     document.title = 'Doomsday Guide - Help - STC'
12 |   }
13 | })
14 | </script>
15 | 


--------------------------------------------------------------------------------
/tgbot/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG MODE
 2 | 
 3 | FROM python:3.11-slim as builder-common
 4 | RUN apt-get update \
 5 | && apt-get install gcc g++ git golang -y \
 6 | && apt-get clean
 7 | WORKDIR /app
 8 | ADD tgbot/requirements.txt tgbot/requirements.txt
 9 | RUN python3 -m venv venv
10 | RUN venv/bin/pip3 install -r tgbot/requirements.txt
11 | COPY fabrica fabrica
12 | COPY library library
13 | COPY tgbot tgbot
14 | COPY infra/hub/aioclient infra/hub/aioclient
15 | COPY infra/hub/proto infra/hub/proto
16 | ENV PYTHONPATH=/app
17 | RUN mkdir /usr/lib/stc-tgbot
18 | RUN mkdir /var/log/stc-tgbot
19 | CMD ["/app/venv/bin/python3", "tgbot/main.py"]


--------------------------------------------------------------------------------
/web/publi.sh:
--------------------------------------------------------------------------------
 1 | npm run build-only
 2 | 
 3 | API_ADDR=($(jq -r '.ipfs_api_multiaddr' summa-config.json))
 4 | echo Adding dist...
 5 | DIST_CID=$(ipfs --api $API_ADDR add --pin -Q -r --hash=blake3 dist)
 6 | echo Settings MFS...
 7 | ipfs --api $API_ADDR files rm -r /stc-web
 8 | ipfs --api $API_ADDR files cp /ipfs/"$DIST_CID" /stc-web
 9 | INDEX_CID=$(jq -r -c '.index' summa-config.json)
10 | ipfs --api $API_ADDR files cp -p /ipfs/$INDEX_CID /stc-web/data
11 | ipfs --api $API_ADDR files cp -p /ipfs/bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze/I /stc-web/images/wiki
12 | ipfs --api $API_ADDR files stat --hash /stc-web
13 | 


--------------------------------------------------------------------------------
/cybrex/cybrex/data_source/base.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from typing import (
 3 |     List,
 4 |     Optional,
 5 | )
 6 | 
 7 | 
 8 | @dataclass
 9 | class SourceDocument:
10 |     document: dict
11 |     document_id: str
12 | 
13 | 
14 | class BaseDataSource:
15 |     async def stream_documents(
16 |         self,
17 |         query: str,
18 |         limit: int = 0,
19 |     ) -> List[SourceDocument]:
20 |         raise NotImplementedError()
21 | 
22 |     async def search_documents(self, query: str, limit: int = 5, sources: Optional[List[str]] = None) -> List[SourceDocument]:
23 |         raise NotImplementedError()
24 | 


--------------------------------------------------------------------------------
/web/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "extends": "@vue/tsconfig/tsconfig.dom.json",
 3 |   "include": [
 4 |     "env.d.ts",
 5 |     "src/**/*.ts",
 6 |     "src/**/*.vue",
 7 |     "summa-config.ts",
 8 |     "vite.config.ts",
 9 |     "vite-sw.config.ts"
10 |   ],
11 |   "compilerOptions": {
12 |     "baseUrl": ".",
13 |     "esModuleInterop": true,
14 |     "moduleResolution": "Node",
15 |     "paths": {
16 |       "@/*": ["./src/*"]
17 |     },
18 |     "resolveJsonModule": true,
19 |     "strict": false,
20 |     "types": ["node"]
21 |   },
22 | 
23 |   "references": [
24 |     {
25 |       "path": "./tsconfig.config.json"
26 |     }
27 |   ]
28 | }
29 | 


--------------------------------------------------------------------------------
/web/README.md:
--------------------------------------------------------------------------------
 1 | # Web STC
 2 | 
 3 | Search engine in your browser that can retrieve all data through IPFS. Uncensorable, unblockable, yours.
 4 | Original instance of STC lives at http://libstc.cc
 5 | 
 6 | Here you can find its source codes and make a contribution if you are a skilled developer.
 7 | 
 8 | ## Development
 9 | 
10 | It requires [IPFS to be installed](https://docs.ipfs.tech/install/ipfs-desktop/) and launched.
11 | 
12 | ```bash
13 | npm i 
14 | npm run dev
15 | ```
16 | 
17 | ## Publishing
18 | 
19 | Publishing here means
20 | - Build static site
21 | - Create IPFS directory with the static site and links to data batteries
22 | 
23 | It can be done with `bash publi.sh`
24 | 


--------------------------------------------------------------------------------
/library/sciparse/language_detect.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | from typing import Dict
 3 | 
 4 | import fasttext
 5 | 
 6 | if os.path.exists('./library/sciparse/models/lid.176.bin'):
 7 |     path_to_pretrained_model = './library/sciparse/models/lid.176.bin'
 8 |     fmodel = fasttext.load_model(path_to_pretrained_model)
 9 | else:
10 |     path_to_pretrained_model = './library/sciparse/models/lid.176.ftz'
11 |     fmodel = fasttext.load_model(path_to_pretrained_model)
12 | 
13 | 
14 | def detect_language(text: str, threshold: float = 0.85) -> Dict[str, float]:
15 |     prediction = fmodel.predict([text.replace('\n', ' ')], threshold=threshold)
16 |     if prediction[0][0]:
17 |         return prediction[0][0][0][-2:]
18 | 


--------------------------------------------------------------------------------
/web/src/components/LoadingSpinner.vue:
--------------------------------------------------------------------------------
 1 | <template lang="pug">
 2 | div(v-if="is_launched")
 3 |   .d-flex.justify-content-center
 4 |     .spinner-border(role="status")
 5 |   .d-flex.justify-content-center.m-5(v-if="label")
 6 |     div {{ label }}
 7 | </template>
 8 | 
 9 | <script lang="ts">
10 | import { defineComponent } from 'vue'
11 | 
12 | export default defineComponent({
13 |   name: 'LoadingSpinner',
14 |   props: {
15 |     label: {
16 |       default: '',
17 |       type: String
18 |     }
19 |   },
20 |   data () {
21 |     return {
22 |       is_launched: false
23 |     }
24 |   },
25 |   created () {
26 |     setInterval(() => {
27 |       this.is_launched = true
28 |     }, 300)
29 |   }
30 | })
31 | </script>
32 | 


--------------------------------------------------------------------------------
/geck/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools<65.0"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "stc-geck"
 7 | version = "1.8.38"
 8 | authors = [{ name = "Interdimensional Walker" }]
 9 | description = "GECK (Garden Of Eden Creation Kit) is a toolkit for setting up and maintaning STC"
10 | readme = "README.md"
11 | requires-python = ">=3.8"
12 | classifiers = [
13 |     "Programming Language :: Python :: 3.8",
14 | ]
15 | dynamic = ["dependencies"]
16 | 
17 | [project.scripts]
18 | geck = "stc_geck.cli:main"
19 | 
20 | [project.urls]
21 | "Homepage" = "https://github.com/nexus-stc/stc"
22 | 
23 | [tool.setuptools.dynamic]
24 | dependencies = {file = ["requirements.txt"]}
25 | 
26 | 


--------------------------------------------------------------------------------
/cybrex/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools<65.0"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "cybrex"
 7 | version = "1.12.10"
 8 | authors = [{ name = "Interdimensional Walker" }]
 9 | description = "Researching AI"
10 | readme = "README.md"
11 | requires-python = ">=3.8"
12 | classifiers = [
13 |     "Programming Language :: Python :: 3.8",
14 | ]
15 | dynamic = ["dependencies"]
16 | 
17 | [project.scripts]
18 | cybrex = "cybrex.cli:main"
19 | 
20 | [project.urls]
21 | "Homepage" = "https://github.com/nexus-stc/stc"
22 | 
23 | [project.optional-dependencies]
24 | petals = ["petals>=2.0.0"]
25 | 
26 | [tool.setuptools.dynamic]
27 | dependencies = {file = ["requirements.txt"]}
28 | 
29 | 


--------------------------------------------------------------------------------
/cybrex/cybrex/utils.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | 
 4 | class MultipleAsyncExecution:
 5 |     def __init__(self, par):
 6 |         self.par = par
 7 |         self.s = asyncio.Semaphore(par)
 8 | 
 9 |     async def execute(self, coro):
10 |         if not self.s:
11 |             raise RuntimeError('`ParallelAsyncExecution` has been already joined')
12 |         await self.s.acquire()
13 |         task = asyncio.create_task(coro)
14 |         task.add_done_callback(lambda f: self.s.release())
15 |         return task
16 | 
17 |     async def join(self):
18 |         for i in range(self.par):
19 |             await self.s.acquire()
20 |         s = self.s
21 |         self.s = None
22 |         for i in range(self.par):
23 |             s.release()
24 | 


--------------------------------------------------------------------------------
/tgbot/handlers/aboutus.py:
--------------------------------------------------------------------------------
 1 | from telethon import (
 2 |     Button,
 3 |     events,
 4 | )
 5 | 
 6 | from library.telegram.base import RequestContext
 7 | from tgbot.translations import t
 8 | 
 9 | from .base import BaseHandler
10 | 
11 | 
12 | class AboutusHandler(BaseHandler):
13 |     filter = events.NewMessage(incoming=True, pattern='^/aboutus(@[A-Za-z0-9_]+)?$')
14 |     is_group_handler = True
15 | 
16 |     async def handler(self, event: events.ChatAction, request_context: RequestContext):
17 |         request_context.statbox(action='show', mode='aboutus')
18 |         await event.reply(
19 |             t('ABOUT_US', request_context.chat['language']),
20 |             buttons=Button.clear(),
21 |             link_preview=False,
22 |         )
23 | 


--------------------------------------------------------------------------------
/tgbot/handlers/librarian.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from telethon import events
 4 | 
 5 | from library.telegram.base import RequestContext
 6 | 
 7 | from .base import BaseHandler
 8 | 
 9 | 
10 | class LibrarianTextHandler(BaseHandler):
11 |     filter = events.NewMessage(incoming=True, pattern=re.compile(r'(.*)', flags=re.DOTALL))
12 |     is_group_handler = True
13 | 
14 |     async def handler(self, event: events.ChatAction, request_context: RequestContext):
15 |         session_id = self.generate_session_id()
16 |         request_context.add_default_fields(mode='librarian_text', session_id=session_id)
17 |         user_id = event.sender_id
18 | 
19 |         if user_id not in self.application.config['librarian']['moderators']:
20 |             await event.delete()
21 | 


--------------------------------------------------------------------------------
/tgbot/main.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import logging
 3 | from concurrent.futures import ThreadPoolExecutor
 4 | 
 5 | from app.application import TelegramApplication
 6 | from configs import get_config
 7 | from izihawa_loglib import configure_logging
 8 | 
 9 | 
10 | def main(config):
11 |     configure_logging(config)
12 |     loop = asyncio.new_event_loop()
13 |     loop.set_default_executor(ThreadPoolExecutor(64))
14 |     asyncio.set_event_loop(loop)
15 |     loop.run_until_complete(TelegramApplication(config=config).start_and_wait())
16 |     asyncio.get_running_loop().stop()
17 |     logging.getLogger('statbox').info({
18 |         'mode': 'application',
19 |         'action': 'exit',
20 |     })
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     main(config=get_config())
25 | 


--------------------------------------------------------------------------------
/web/src/components/QrCode.vue:
--------------------------------------------------------------------------------
 1 | <template lang="pug">
 2 | div.favicon-inversion-filter.text-center(id="qr-code")
 3 | </template>
 4 | 
 5 | <script lang="ts">
 6 | import QrCreator from 'qr-creator'
 7 | import { defineComponent } from 'vue'
 8 | 
 9 | export default defineComponent({
10 |   name: 'QrCode',
11 |   props: {
12 |     url: {
13 |       default: '',
14 |       type: String
15 |     }
16 |   },
17 |   async mounted () {
18 |     QrCreator.render(
19 |       {
20 |         text: this.url,
21 |         radius: 0.5, // 0.0 to 0.5
22 |         ecLevel: 'H', // L, M, Q, H
23 |         fill: '#000000', // foreground color
24 |         background: null, // color or null for transparent
25 |         size: 280 // in pixels
26 |       },
27 |       document.querySelector('#qr-code')!
28 |     )
29 |   }
30 | })
31 | </script>
32 | 


--------------------------------------------------------------------------------
/tgbot/handlers/help.py:
--------------------------------------------------------------------------------
 1 | from telethon import (
 2 |     Button,
 3 |     events,
 4 | )
 5 | 
 6 | from library.telegram.base import RequestContext
 7 | from tgbot.translations import t
 8 | 
 9 | from .base import BaseHandler
10 | 
11 | 
12 | class HelpHandler(BaseHandler):
13 |     filter = events.NewMessage(incoming=True, pattern='^/help(@[A-Za-z0-9_]+)?$')
14 |     is_group_handler = True
15 | 
16 |     async def handler(self, event: events.ChatAction, request_context: RequestContext):
17 |         request_context.statbox(action='show', mode='help')
18 | 
19 |         if event.is_group or event.is_channel:
20 |             if event.pattern_match.group(1) == f'@{request_context.bot_name}':
21 |                 await event.reply(t('HELP_FOR_GROUPS', request_context.chat['language']), buttons=Button.clear())
22 |         else:
23 |             await event.reply(t('HELP', request_context.chat['language']), buttons=Button.clear())
24 | 


--------------------------------------------------------------------------------
/tgbot/handlers/howtohelp.py:
--------------------------------------------------------------------------------
 1 | from telethon import events
 2 | 
 3 | from library.telegram.base import RequestContext
 4 | from tgbot.configs import config
 5 | from tgbot.translations import t
 6 | 
 7 | from .base import BaseHandler
 8 | 
 9 | 
10 | class HowToHelpHandler(BaseHandler):
11 |     filter = events.NewMessage(incoming=True, pattern='^/howtohelp(@[A-Za-z0-9_]+)?$')
12 |     is_group_handler = True
13 | 
14 |     async def handler(self, event: events.ChatAction, request_context: RequestContext):
15 |         request_context.statbox(action='show', mode='howtohelp')
16 |         await event.reply(
17 |             t('HOW_TO_HELP', request_context.chat['language']).format(
18 |                 reddit_url=config['reddit'].get('url', '🚫'),
19 |                 related_channel=config['telegram'].get('related_channel', '🚫'),
20 |                 twitter_contact_url=config['twitter'].get('contact_url', '🚫')
21 |             ))
22 | 


--------------------------------------------------------------------------------
/library/sciparse/cli.py:
--------------------------------------------------------------------------------
 1 | import fire
 2 | from aiobaseclient import BaseClient
 3 | from aiokit.utils import sync_fu
 4 | from izihawa_ipfs_api import IpfsHttpClient
 5 | 
 6 | from library.sciparse.sciparser import (
 7 |     ClientPool,
 8 |     SciParser,
 9 | )
10 | 
11 | 
12 | async def process(grobid_base_url, ipfs_base_url, doi):
13 |     ipfs_http_client = IpfsHttpClient(base_url=ipfs_base_url)
14 |     await ipfs_http_client.start()
15 |     grobid_client = BaseClient(base_url=grobid_base_url)
16 |     await grobid_client.start()
17 | 
18 |     sci_parser = SciParser(
19 |         ipfs_http_client=ipfs_http_client,
20 |         grobid_pool=ClientPool.from_client(grobid_client, par=16),
21 |     )
22 |     await sci_parser.start()
23 |     parsed_paper = await sci_parser.parse_paper(doi)
24 |     print(parsed_paper)
25 | 
26 | 
27 | def main():
28 |     fire.Fire(sync_fu(process))
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     main()
33 | 


--------------------------------------------------------------------------------
/tgbot/handlers/shortlink.py:
--------------------------------------------------------------------------------
 1 | from telethon import events
 2 | 
 3 | from library.telegram.base import RequestContext
 4 | from tgbot.translations import t
 5 | from tgbot.views.telegram.common import (
 6 |     TooLongQueryError,
 7 |     encode_query_to_deep_link,
 8 | )
 9 | 
10 | from .base import BaseHandler
11 | 
12 | 
13 | class ShortlinkHandler(BaseHandler):
14 |     filter = events.NewMessage(incoming=True, pattern='^/shortlink\\s?(.*)?')
15 | 
16 |     async def handler(self, event: events.ChatAction, request_context: RequestContext):
17 |         query = event.pattern_match.group(1)
18 |         request_context.statbox(action='start', mode='shortlink', query=query)
19 | 
20 |         try:
21 |             text = encode_query_to_deep_link(query, request_context.bot_name)
22 |         except TooLongQueryError:
23 |             text = t('TOO_LONG_QUERY_FOR_SHORTLINK', request_context.chat['language'])
24 | 
25 |         return await event.reply(f'`{text}`', link_preview=False)
26 | 


--------------------------------------------------------------------------------
/tgbot/app/exceptions.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from izihawa_utils.exceptions import BaseError
 4 | 
 5 | 
 6 | class BannedUserError(BaseError):
 7 |     level = logging.WARNING
 8 |     code = 'banned_user_error'
 9 | 
10 |     def __init__(self, ban_timeout: int):
11 |         self.ban_timeout = ban_timeout
12 | 
13 | 
14 | class UnknownFileFormatError(BaseError):
15 |     level = logging.WARNING
16 |     code = 'unknown_file_format_error'
17 | 
18 | 
19 | class UnknownIndexAliasError(BaseError):
20 |     code = 'unknown_index_alias_error'
21 | 
22 | 
23 | class WidgetError(BaseError):
24 |     level = logging.WARNING
25 |     code = 'widget_error'
26 | 
27 |     def __init__(self, text, buttons):
28 |         self.text = text
29 |         self.buttons = buttons
30 | 
31 | 
32 | class DownloadError(BaseError):
33 |     level = logging.WARNING
34 |     code = 'download_error'
35 | 
36 | 
37 | class InvalidSearchError(BaseError):
38 |     def __init__(self, search):
39 |         self.search = search
40 | 


--------------------------------------------------------------------------------
/web/src/components/ReferencesList.vue:
--------------------------------------------------------------------------------
 1 | <template lang="pug">
 2 | div
 3 |   loading-spinner(v-if="references === null")
 4 |   div(v-else)
 5 |     div(v-for="(reference, i) of references")
 6 |       document-snippet.small(
 7 |         :document="JSON.parse(reference.document)",
 8 |         v-bind:key="reference.position",
 9 |         :with_abstract="false",
10 |         :with_cover="false",
11 |         :with_tags="false",
12 |       )
13 |       hr(v-if="i !== references.length - 1")
14 | </template>
15 | 
16 | <script lang="ts">
17 | import { defineComponent, type PropType } from 'vue'
18 | 
19 | import LoadingSpinner from './LoadingSpinner.vue'
20 | import DocumentSnippet from "@/components/DocumentSnippet.vue";
21 | 
22 | export default defineComponent({
23 |   name: 'ReferencesList',
24 |   components: {DocumentSnippet, LoadingSpinner },
25 |   props: {
26 |     references: {
27 |       type: null as PropType<Array<{ document: any, position: number }>> | null
28 |     }
29 |   }
30 | })
31 | </script>
32 | 


--------------------------------------------------------------------------------
/web/src/components/SearchList.vue:
--------------------------------------------------------------------------------
 1 | <template lang="pug">
 2 | div(v-for="scored_document in scored_documents" v-bind:key="scored_document.position")
 3 |   .card.mb-3
 4 |     .card-body
 5 |       document-snippet(
 6 |         :document="JSON.parse(scored_document.document)"
 7 |         :snippets="scored_document.snippets",
 8 |         :with_abstract="true"
 9 |         :with_cover="true"
10 |         :with_large_caption="true"
11 |       )
12 | </template>
13 | 
14 | <script lang="ts">
15 | import { defineComponent } from 'vue'
16 | 
17 | import DocumentSnippet from '@/components/DocumentSnippet.vue'
18 | 
19 | export default defineComponent({
20 |   name: 'SearchList',
21 |   components: { DocumentSnippet },
22 |   props: {
23 |     scored_documents: {
24 |       type: Array,
25 |       required: true
26 |     }
27 |   }
28 | })
29 | </script>
30 | 
31 | <style scoped lang="scss">
32 | li {
33 |   padding-bottom: 15px;
34 |   padding-left: 0;
35 |   &:after {
36 |     content: none;
37 |   }
38 | }
39 | </style>
40 | 


--------------------------------------------------------------------------------
/web/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="UTF-8" />
 5 |     <link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
 6 |     <link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">
 7 |     <link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png">
 8 |     <link href="/favicon.svg" rel="icon"/>
 9 |     <link rel="manifest" href="/site.webmanifest">
10 |     <link rel="mask-icon" href="/safari-pinned-tab.svg" color="#5bbad5">
11 |     <meta name="msapplication-TileColor" content="#ffc40d">
12 |     <meta name="theme-color" content="#ffffff">
13 |     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
14 |     <meta name="description" content="Search the entirety of humanity knowledge, including books and scholarly publications">
15 |     <title>STC</title>
16 |   </head>
17 |   <body>
18 |     <div id="app"></div>
19 |     <script type="module" src="./src/main.ts"></script>
20 |   </body>
21 | </html>
22 | 


--------------------------------------------------------------------------------
/web/src/main.ts:
--------------------------------------------------------------------------------
 1 | import './scss/styles.scss'
 2 | import 'bootstrap'
 3 | import 'bootstrap/js/dist/tab'
 4 | 
 5 | import { createApp } from 'vue'
 6 | 
 7 | import App from './App.vue'
 8 | import router from './router'
 9 | import { get_label } from './translations'
10 | import {SearchService} from "@/services/search/search-service";
11 | 
12 | // Set theme to the user's preferred color scheme
13 | function updateTheme () {
14 |   const color_mode = window.matchMedia('(prefers-color-scheme: dark)').matches
15 |     ? 'dark'
16 |     : 'light'
17 |   document.querySelector('html').setAttribute('data-bs-theme', color_mode)
18 | }
19 | 
20 | // Set theme on load
21 | updateTheme()
22 | 
23 | // Update theme when the preferred scheme changes
24 | window
25 |   .matchMedia('(prefers-color-scheme: dark)')
26 |   .addEventListener('change', updateTheme)
27 | 
28 | const app = createApp(App)
29 | app.use(router)
30 | 
31 | app.mixin({
32 |   methods: {
33 |     get_label
34 |   }
35 | })
36 | app.config.globalProperties.search_service = new SearchService("info")
37 | app.mount('#app')
38 | 


--------------------------------------------------------------------------------
/cybrex/cybrex/llm_manager.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | class LLMManager:
 5 |     def __init__(self, llm, prompter, config, max_prompt_chars, tokenizer=None):
 6 |         self.llm = llm
 7 |         self.prompter = prompter
 8 |         self.config = config
 9 |         self.max_prompt_chars = max_prompt_chars
10 |         self.tokenizer = tokenizer
11 | 
12 |     @property
13 |     def context_length(self):
14 |         return self.config['context_length']
15 | 
16 |     def process(self, prompt):
17 |         logging.getLogger('statbox').info({'action': 'process', 'mode': 'llm_manager', 'prompt': prompt})
18 |         if self.tokenizer:
19 |             input_ids = self.tokenizer(prompt, return_tensors="pt")["input_ids"]
20 |             outputs = self.llm.generate(
21 |                 input_ids,
22 |                 max_new_tokens=self.config.get('max_new_tokens'),
23 |                 temperature=self.config.get('temperature', 1.0),
24 |             )
25 |             return self.tokenizer.batch_decode(outputs[:, input_ids.shape[1]:])[0].replace('</s>', '')
26 |         else:
27 |             return self.llm(prompt)
28 | 


--------------------------------------------------------------------------------
/web/src/components/ConnectivityIssues.vue:
--------------------------------------------------------------------------------
 1 | <template lang="pug">
 2 | div.font-monospace
 3 |   p {{ text }}
 4 | </template>
 5 | 
 6 | <script lang="ts">
 7 | import { utils } from 'summa-wasm'
 8 | import { defineComponent } from 'vue'
 9 | 
10 | import { get_label } from '@/translations'
11 | 
12 | export default defineComponent({
13 |   name: 'ConnectivityIssues',
14 |   props: {
15 |     reason: {
16 |       type: Error
17 |     }
18 |   },
19 |   computed: {
20 |     text () {
21 |       if (this.reason?.toString().startsWith('CompileError')) {
22 |         return get_label('unsupported_browser')
23 |       } else if (
24 |         (this.reason?.name === 'AxiosError' ||
25 |           this.reason?.toString().includes('\\"status\\":0') ||
26 |           this.reason?.toString().includes('EOF while parsing a value'))
27 |       ) {
28 |         if (this.is_localhost) {
29 |           return get_label('is_ipfs_enabled')
30 |         } else {
31 |           return get_label('network_error')
32 |         }
33 |       }
34 |       return this.reason
35 |     },
36 |     is_localhost () {
37 |       return utils.get_ipfs_url().includes('localhost')
38 |     }
39 |   }
40 | })
41 | </script>
42 | 


--------------------------------------------------------------------------------
/web/.eslintrc.js:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |     "env": {
 3 |         "browser": true,
 4 |         "es2021": true
 5 |     },
 6 |     "extends": [
 7 |         "standard-with-typescript",
 8 |         "plugin:vue/vue3-recommended",
 9 |         "@vue/typescript/recommended",
10 |     ],
11 |     "overrides": [
12 |         {
13 |             "env": {
14 |                 "node": true
15 |             },
16 |             "files": [
17 |                 ".eslintrc.{js,cjs}"
18 |             ],
19 |             "parserOptions": {
20 |                 "sourceType": "script"
21 |             }
22 |         }
23 |     ],
24 |     "parser": "vue-eslint-parser",
25 |     "parserOptions": {
26 |         "ecmaVersion": "latest",
27 |         "project": "./tsconfig.json",
28 |         "sourceType": "module"
29 |     },
30 |     "plugins": [
31 |         "@typescript-eslint",
32 |         "vue",
33 |         "simple-import-sort"
34 |     ],
35 |     "root": true,
36 |     "rules": {
37 |         "simple-import-sort/imports": "error",
38 |         "simple-import-sort/exports": "error",
39 |         "@typescript-eslint/naming-convention": [
40 |             "error",
41 |             {
42 |                 "selector": "variableLike", "format": ["snake_case", "camelCase"]
43 |             }
44 |         ]
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/tgbot/README.md:
--------------------------------------------------------------------------------
 1 | ### Launching bots
 2 | 
 3 | - The first startup will be slow!
 4 | - Make sure to mount volumes for persistence. Otherwise, after every restart, you will lose your caches and databases (including users and riot bots).
 5 | - Beforehand, you need to set up all credentials in the `.env.light` file. After setting them up, execute the following command in the Terminal:
 6 | 
 7 | ```bash
 8 | docker compose --env-file .env.light up --force-recreate --build
 9 | ```
10 | Wait for the following line to be displayed in the logs:
11 | ```bash
12 | light-tgbot-1  | INFO:statbox:{'action': 'started', 'mode': 'dynamic_bot', 'bot_name': '<bot_name>'}
13 | ```
14 | 
15 | Possible performance optimizations, from least to most complicated:
16 | 
17 | - Mount to tgbot to cache bot credentials:
18 |     ```yaml
19 |     volumes:
20 |     - /usr/lib/stc-tgbot:/usr/lib/stc-tgbot
21 |     - /var/log/stc-tgbot:/var/log/stc-tgbot
22 |     ```
23 | - Mount to ipfs to cache the database and downloaded items:
24 |     ```yaml
25 |     volumes:
26 |     - /data/ipfs:/data/ipfs
27 |     ```
28 | - If you have mounted volumes to ipfs, pin the database to IPFS:
29 |     ```bash
30 |     docker compose --env-file .env.light exec ipfs ipfs pin add /ipns/libstc.cc --progress
31 |     ```
32 | - Host the database directly (requires development experience).


--------------------------------------------------------------------------------
/web/src/views/IntroView.vue:
--------------------------------------------------------------------------------
 1 | <template lang="pug">
 2 | .container
 3 |   h2 Welcome, seeker!
 4 |   p 欢迎，探索者！- आपका स्वागत है, साधक! - ¡Bienvenido buscador! - Bem-vindo, buscador! - مرحبًا بك أيها الباحث! - Добро пожаловать, искатель!
 5 |   ul
 6 |     li
 7 |       a(href="#/nexus_science/cid:bafykbzaceayxkpz5tk6nuqjzoidix4y4lakfwvzkmfisycduijcwfew7waa5e") English through pictures
 8 |     li
 9 |       a(href="#/?q=english+for+beginners&p=1&d=nexus_science") Further sources for learning English
10 |   h4 {{ get_label('about') }}
11 |   span(v-html="get_label('about_intro')")
12 |   hr
13 |   img.img-fluid(id="origin", src="@/assets/origin.jpg")
14 | </template>
15 | <script lang="ts">
16 | import { defineComponent } from 'vue'
17 | 
18 | import { get_label } from '../translations'
19 | export default defineComponent({
20 |   name: 'IntroView',
21 |   created () {
22 |     document.title = 'Welcome - Help - STC'
23 |     const description = document.querySelector('head meta[name="description"]')
24 |     if (description != null) {
25 |       description.setAttribute('content', get_label('about_intro'))
26 |     }
27 |   },
28 |   methods: { get_label }
29 | })
30 | </script>
31 | 
32 | <style lang="scss" scoped>
33 | #origin {
34 |   border-width: 20px;
35 |   border-radius: 20px;
36 |   border-color: #111111;
37 |   border-style: solid;
38 | }
39 | </style>
40 | 


--------------------------------------------------------------------------------
/library/textutils/__init__.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | NON_ALNUMWHITESPACE_REGEX = re.compile(r'([^\s\w])+')
 4 | EMAIL_REGEX = re.compile(r'([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})')
 5 | HASHTAG_REGEX = re.compile(r'([#@]+)([A-Za-z0-9_]+)')
 6 | MULTIWHITESPACE_REGEX = re.compile(r"\s+")
 7 | STICKER_REGEX = re.compile(
 8 |     '^[\U0001F1E0-\U0001F1FF'
 9 |     '\U0001F300-\U0001F5FF'
10 |     '\U0001F600-\U0001F64F'
11 |     '\U0001F680-\U0001F6FF'
12 |     '\U0001F700-\U0001F77F'
13 |     '\U0001F780-\U0001F7FF'
14 |     '\U0001F800-\U0001F8FF'
15 |     '\U0001F900-\U0001F9FF'
16 |     '\U0001FA00-\U0001FA6F'
17 |     '\U0001FA70-\U0001FAFF'
18 |     '\U00002702-\U000027B0]$',
19 |     flags=re.UNICODE,
20 | )
21 | URL_REGEX_TEXT = r'(https?|ftp)?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)'
22 | URL_REGEX = re.compile(URL_REGEX_TEXT)
23 | HIDDEN_CHAR = '‌'
24 | TELEGRAM_LINK_REGEX = re.compile('(?:https?://)?t\\.me/(?!joinchat/)([A-Za-z0-9_]+)')
25 | 
26 | DOI_WILDCARD_REGEX_TEXT = r'(10.\d{4,9}).*\.\*'
27 | DOI_REGEX_TEXT = r'(?:doi.org/)?(10.\d{4,9})\s?(?:/|%2[Ff])\s?([%-._;()<>/:A-Za-z0-9]+[^.?\s])'
28 | DOI_REGEX = re.compile(DOI_REGEX_TEXT)
29 | ISBN_REGEX = re.compile(r'^(?:[iI][sS][bB][nN]\:?\s*)?((97(8|9))?\-?\d{9}(\d|X))$')
30 | MD5_REGEX = re.compile(r'([A-Fa-f0-9]{32})')
31 | ONLY_DOI_REGEX = re.compile(r'^(10.\d{4,9})\s?/\s?([-._;()<>/:A-Za-z0-9]+[^.?\s])$')
32 | PUBMED_ID_REGEX = re.compile(r'(?:(?:https?://)?(?:www.)?ncbi.nlm.nih.gov/pubmed/|[Pp][Mm][Ii][Dd]\s?:?\s*)([0-9]+)')
33 | CJK_CHAR_REGEX_TEXT = r'[\u4e00-\u9fff]'
34 | 


--------------------------------------------------------------------------------
/library/telegram/promotioner.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | 
 4 | class Promotioner:
 5 |     """
 6 |     Promotioner is used to select promotion randomly based on weights of every promotion.
 7 |     """
 8 |     def __init__(
 9 |         self,
10 |         promotions: list,
11 |         default_promotion_index: int = 0,
12 |         promotion_vars: dict = None,
13 |     ):
14 |         self.promotions = promotions
15 |         self.default_promotion_index = default_promotion_index
16 |         if not promotion_vars:
17 |             promotion_vars = {}
18 |         self.promotion_vars = promotion_vars
19 |         self.partial_sums: list = [self.promotions[0]['weight']]
20 |         for promotion in self.promotions[1:]:
21 |             self.partial_sums.append(promotion['weight'] + self.partial_sums[-1])
22 | 
23 |     def choose_promotion(self, language: str = 'en') -> str:
24 |         pivot = random.randrange(self.partial_sums[-1])
25 |         for partial_sum, promotion in zip(self.partial_sums, self.promotions):
26 |             if partial_sum <= pivot:
27 |                 continue
28 |             if language in promotion['texts']:
29 |                 return promotion['texts'][language].format(**self.promotion_vars)
30 |             elif promotion.get('local', False):
31 |                 default_promotion = self.promotions[self.default_promotion_index]
32 |                 if language in default_promotion['texts']:
33 |                     return default_promotion['texts'][language].format(**self.promotion_vars)
34 |                 return default_promotion['texts']['en'].format(**self.promotion_vars)
35 |             else:
36 |                 return promotion['texts']['en'].format(**self.promotion_vars)
37 | 


--------------------------------------------------------------------------------
/library/telegram/utils.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import traceback
 3 | from contextlib import asynccontextmanager
 4 | from typing import Optional
 5 | 
 6 | from izihawa_loglib import error_log
 7 | from telethon import (
 8 |     errors,
 9 |     events,
10 | )
11 | 
12 | 
13 | @asynccontextmanager
14 | async def safe_execution(
15 |     error_log=error_log,
16 |     on_fail: Optional = None,
17 |     level=logging.WARNING,
18 |     is_logging_enabled: bool = True
19 | ):
20 |     try:
21 |         try:
22 |             yield
23 |         except events.StopPropagation:
24 |             raise
25 |         except errors.MessageNotModifiedError:
26 |             pass
27 |         except (
28 |             errors.UserIsBlockedError,
29 |             errors.QueryIdInvalidError,
30 |             errors.MessageDeleteForbiddenError,
31 |             errors.MessageIdInvalidError,
32 |             errors.ChatAdminRequiredError,
33 |         ) as e:
34 |             if is_logging_enabled:
35 |                 error_log(e, level=level)
36 |                 traceback.print_exc()
37 |         except ValueError as e:
38 |             if e.args and e.args[0].startswith('Request was unsuccessful'):
39 |                 if is_logging_enabled:
40 |                     error_log(e, level=level)
41 |             else:
42 |                 raise
43 |         except Exception as e:
44 |             if is_logging_enabled:
45 |                 error_log(e, level=level)
46 |                 traceback.print_exc()
47 |             if on_fail:
48 |                 await on_fail()
49 |     except events.StopPropagation:
50 |         raise
51 |     except Exception as e:
52 |         if is_logging_enabled:
53 |             error_log(e, level=level)
54 | 


--------------------------------------------------------------------------------
/web/src/views/StcHubApiView.vue:
--------------------------------------------------------------------------------
 1 | <template lang="pug">
 2 | .container
 3 |   h3 STC Hub API
 4 |   p The STC Hub API offers a straightforward way to integrate third-party applications with the STC's extensive corpus of scholarly publications.
 5 |   h5 Introduction
 6 |   p IPFS allows the creation of large, distributable directories across multiple peers. Each directory is assigned a unique CID that can be used as a reference.
 7 |   p We have utilized this feature to compile a directory of numerous scholarly papers, each labeled with its DOI. The entire directory can be accessed via the following alias: <code>/ipns/hub.standard-template-construct.org</code>
 8 |   p Users have the ability to pin this directory, locate a paper by its DOI, or directly retrieve papers from the directory.
 9 |   h5 Usage
10 |   h6 Retrieving Files Through IPFS
11 |   p All DOIs are urlencoded to accommodate special characters in the name. Below is an example of how to retrieve a paper using the Kubo CLI:
12 |   pre
13 |     code ipfs get /ipns/hub.standard-template-construct.org/10.1145%2F15922.15895.pdf
14 |   h6 Retrieving Files Through HTTP API
15 |   p The names are urlencoded twice due to the HTTP server decoding urlencoded URLs independently before passing them to the IPFS network:
16 |   pre
17 |     code
18 |       | export IPNS_NAME=/ipns/hub.standard-template-construct.org
19 |       | export GATEWAY_URL=http://localhost:8080
20 |       | curl -L $GATEWAY_URL/$IPNS_NAME/10.1145%252F15922.15895.pdf
21 | </template>
22 | <script lang="ts">
23 | import { defineComponent } from 'vue'
24 | export default defineComponent({
25 |   name: 'StcHubApiView',
26 |   created () {
27 |     document.title = 'STC Hub API - Help - STC'
28 |   }
29 | })
30 | </script>
31 | 


--------------------------------------------------------------------------------
/tgbot/handlers/close.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import time
 3 | 
 4 | from telethon import events
 5 | 
 6 | from library.telegram.base import RequestContext
 7 | from library.telegram.utils import safe_execution
 8 | from tgbot.translations import t
 9 | 
10 | from .base import BaseCallbackQueryHandler
11 | 
12 | 
13 | def is_earlier_than_2_days(message):
14 |     if message.date:
15 |         return time.time() - time.mktime(message.date.timetuple()) < 48 * 60 * 60 - 10
16 | 
17 | 
18 | class CloseHandler(BaseCallbackQueryHandler):
19 |     filter = events.CallbackQuery(pattern='^/close(?:_([A-Za-z0-9]+))?(?:_([0-9]+))?$')
20 | 
21 |     async def handler(self, event, request_context: RequestContext):
22 |         session_id = event.pattern_match.group(1)
23 |         if session_id:
24 |             session_id = session_id.decode()
25 |         request_context.add_default_fields(mode='close')
26 | 
27 |         target_events = []
28 |         message = await event.get_message()
29 | 
30 |         if message and is_earlier_than_2_days(message):
31 |             target_events.append(event.answer())
32 |             request_context.statbox(
33 |                 action='close',
34 |                 message_id=message.id,
35 |                 session_id=session_id,
36 |             )
37 |             reply_message = await message.get_reply_message()
38 |             if reply_message and is_earlier_than_2_days(reply_message):
39 |                 target_events.append(reply_message.delete())
40 |             target_events.append(message.delete())
41 |         else:
42 |             async with safe_execution(is_logging_enabled=False):
43 |                 await event.answer(t('DELETION_FORBIDDEN_DUE_TO_AGE'))
44 |         await asyncio.gather(*target_events)
45 | 


--------------------------------------------------------------------------------
/tgbot/handlers/report.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from stc_geck.advices import BaseDocumentHolder
 4 | from telethon import events
 5 | 
 6 | from library.telegram.base import RequestContext
 7 | from library.telegram.utils import safe_execution
 8 | 
 9 | from .base import BaseCallbackQueryHandler
10 | 
11 | 
12 | class ReportHandler(BaseCallbackQueryHandler):
13 |     filter = events.NewMessage(incoming=True, pattern=r'^(?:@\w+)?\s+\/r_([A-Za-z0-9_-]+)(?:\s+(.*))?$')
14 | 
15 |     def parse_pattern(self, event: events.ChatAction):
16 |         cid, reason = event.pattern_match.group(1),event.pattern_match.group(2)
17 |         return cid, reason
18 | 
19 |     async def handler(self, event: events.ChatAction, request_context: RequestContext):
20 |         cid, reason = self.parse_pattern(event)
21 | 
22 |         request_context.add_default_fields(mode='report', cid=cid)
23 |         request_context.statbox(action='report')
24 | 
25 |         document = await self.application.summa_client.get_one_by_field_value('nexus_science', 'cid', cid)
26 |         document_holder = BaseDocumentHolder(document)
27 | 
28 |         await self.application.database.add_vote_broken_file(
29 |             bot_name=self.bot_config['bot_name'],
30 |             user_id=request_context.chat['chat_id'],
31 |             internal_id=document_holder.get_internal_id(),
32 |             cid=cid,
33 |             reason=reason,
34 |         )
35 |         async with safe_execution():
36 |             return await asyncio.gather(
37 |                 event.reply(
38 |                     f'Thank you for reporting `{document_holder.get_internal_id()}`. '
39 |                     f'Be careful, too many misreports will cause a ban',
40 |                 ),
41 |                 event.delete(),
42 |             )
43 | 


--------------------------------------------------------------------------------
/web/public/favicon-black.svg:
--------------------------------------------------------------------------------
1 | <svg version="1.0" xmlns="http://www.w3.org/2000/svg" width="300.000000pt" height="300.000000pt"
2 |      viewBox="0 0 300.000000 300.000000" preserveAspectRatio="xMidYMid meet">
3 |     <style>
4 |         path { fill: black; }
5 |     </style>
6 |     <g transform="translate(0.000000,300.000000) scale(0.100000,-0.100000)">
7 |         <path d="M1345 2646 c-232 -56 -404 -310 -366 -539 7 -39 22 -97 35 -129 l24 -59 -32 -24 c-58 -45 -150 -114 -293 -220 l-141 -105 -53 27 c-47 25 -64 28 -144 28 -81 0 -95 -3 -144 -30 -173 -95 -217 -325 -90 -468 140 -159 367 -150 487 21 28 40 44 77 53 121 13 63 12 73 -25 196 -5 17 28 45 209 180 119 88 222 164 229 169 10 7 26 0 54 -22 59 -47 144 -89 206 -102 l56 -11 0 -338 0 -337 -50 -18 c-102 -37 -185 -146 -197 -258 -12 -120 59 -245 172 -303 78 -40 184 -42 259 -6 109 54 180 164 181 282 0 56 -5 76 -35 135 -38 75 -105 135 -172 156 l-38 11 0 339 c0 275 2 338 13 338 39 0 151 48 216 91 40 28 76 49 80 47 16 -7 480 -360 490 -372 9 -11 9 -20 2 -35 -16 -29 -13 -130 4 -182 25 -74 83 -140 155 -177 61 -32 70 -34 145 -30 101 5 157 30 219 99 56 63 91 159 81 229 -9 64 -59 162 -104 203 -103 93 -264 101 -383 19 l-45 -31 -246 184 c-135 101 -248 189 -252 194 -3 6 2 25 11 43 28 53 44 128 44 203 0 325 -298 558 -615 481z m100 -298 c15 -32 33 -33 47 -3 16 36 42 28 46 -14 3 -26 7 -32 20 -28 65 21 65 21 59 1 -21 -68 -22 -64 17 -64 28 0 36 -4 36 -18 0 -11 -12 -24 -26 -31 -29 -13 -28 -31 4 -46 12 -5 22 -18 22 -27 0 -14 -8 -18 -35 -18 -39 0 -44 -13 -20 -47 20 -29 -2 -50 -31 -28 -31 24 -44 18 -44 -19 0 -46 -25 -55 -45 -16 -19 36 -30 38 -46 8 -25 -48 -49 -45 -49 8 0 38 -9 42 -46 17 -22 -14 -27 -14 -34 -2 -6 9 -4 22 5 36 22 34 18 41 -22 45 -47 4 -53 22 -14 43 39 19 39 26 0 49 -38 23 -33 40 14 44 40 3 45 15 22 46 -22 29 -1 51 28 31 34 -24 47 -19 47 20 0 27 4 35 18 35 9 0 22 -10 27 -22z"/>
8 |     </g>
9 | </svg>


--------------------------------------------------------------------------------
/library/textutils/html_processing.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from library.textutils.utils import despace
 4 | 
 5 | 
 6 | def reduce_br(soup_str):
 7 |     soup_str = soup_str.replace("<br>", "<br/>").replace('<p><br/>', '<p>').replace('<br/></p>', '</p>')
 8 |     soup_str = re.sub(r'([^.>])<br/>([^(<br/>)])', r'\g<1> \g<2>', soup_str)
 9 |     soup_str = re.sub(r'(?:<br/>\s*)+([^(<br/>)])', r'<br/><br/>\g<1>', soup_str)
10 |     soup_str = despace(soup_str)
11 |     return soup_str
12 | 
13 | 
14 | def remove_chars(soup_str):
15 |     soup_str = soup_str.replace('\ufeff', '').replace('\r\n', '\n')
16 |     return soup_str
17 | 
18 | 
19 | def process_tags(soup):
20 |     for el in soup.find_all():
21 |         if el.name == 'span':
22 |             el.unwrap()
23 |         elif el.name == 'em':
24 |             el.name = 'i'
25 |         elif el.name == 'italic':
26 |             el.name = 'i'
27 |         elif el.name == 'strong':
28 |             el.name = 'b'
29 |         elif el.name == 'sec':
30 |             el.name = 'section'
31 |         elif el.name == 'p' and 'ref' in el.attrs.get('class', []):
32 |             el.name = 'ref'
33 |         elif el.name == 'disp-formula':
34 |             el.name = 'formula'
35 |         new_attrs = {}
36 |         if 'href' in el.attrs:
37 |             new_attrs['href'] = el.attrs['href']
38 |         if 'class' in el.attrs:
39 |             new_attrs['class'] = el.attrs['class']
40 |         el.attrs = new_attrs
41 |     return soup
42 | 
43 | 
44 | def headerize_headers(soup):
45 |     for el in soup.find_all():
46 |         if el.name == 'p':
47 |             children = list(el.children)
48 |             if len(children) == 1 and children[0].name == 'b':
49 |                 new_header = children[0]
50 |                 new_header.name = 'header'
51 |                 el.replace_with(new_header)
52 |     return soup
53 | 


--------------------------------------------------------------------------------
/web/public/favicon.svg:
--------------------------------------------------------------------------------
 1 | <svg version="1.0" xmlns="http://www.w3.org/2000/svg" width="300.000000pt" height="300.000000pt"
 2 |      viewBox="0 0 300.000000 300.000000" preserveAspectRatio="xMidYMid meet">
 3 |     <style>
 4 |         path { fill: #644494; }
 5 |         @media (prefers-color-scheme: light) { path { fill: #644494; } }
 6 |         @media (prefers-color-scheme: dark) { path { fill: #FE9609; } }
 7 |     </style>
 8 |     <g transform="translate(0.000000,300.000000) scale(0.100000,-0.100000)">
 9 |         <path d="M1345 2646 c-232 -56 -404 -310 -366 -539 7 -39 22 -97 35 -129 l24 -59 -32 -24 c-58 -45 -150 -114 -293 -220 l-141 -105 -53 27 c-47 25 -64 28 -144 28 -81 0 -95 -3 -144 -30 -173 -95 -217 -325 -90 -468 140 -159 367 -150 487 21 28 40 44 77 53 121 13 63 12 73 -25 196 -5 17 28 45 209 180 119 88 222 164 229 169 10 7 26 0 54 -22 59 -47 144 -89 206 -102 l56 -11 0 -338 0 -337 -50 -18 c-102 -37 -185 -146 -197 -258 -12 -120 59 -245 172 -303 78 -40 184 -42 259 -6 109 54 180 164 181 282 0 56 -5 76 -35 135 -38 75 -105 135 -172 156 l-38 11 0 339 c0 275 2 338 13 338 39 0 151 48 216 91 40 28 76 49 80 47 16 -7 480 -360 490 -372 9 -11 9 -20 2 -35 -16 -29 -13 -130 4 -182 25 -74 83 -140 155 -177 61 -32 70 -34 145 -30 101 5 157 30 219 99 56 63 91 159 81 229 -9 64 -59 162 -104 203 -103 93 -264 101 -383 19 l-45 -31 -246 184 c-135 101 -248 189 -252 194 -3 6 2 25 11 43 28 53 44 128 44 203 0 325 -298 558 -615 481z m100 -298 c15 -32 33 -33 47 -3 16 36 42 28 46 -14 3 -26 7 -32 20 -28 65 21 65 21 59 1 -21 -68 -22 -64 17 -64 28 0 36 -4 36 -18 0 -11 -12 -24 -26 -31 -29 -13 -28 -31 4 -46 12 -5 22 -18 22 -27 0 -14 -8 -18 -35 -18 -39 0 -44 -13 -20 -47 20 -29 -2 -50 -31 -28 -31 24 -44 18 -44 -19 0 -46 -25 -55 -45 -16 -19 36 -30 38 -46 8 -25 -48 -49 -45 -49 8 0 38 -9 42 -46 17 -22 -14 -27 -14 -34 -2 -6 9 -4 22 5 36 22 34 18 41 -22 45 -47 4 -53 22 -14 43 39 19 39 26 0 49 -38 23 -33 40 14 44 40 3 45 15 22 46 -22 29 -1 51 28 31 34 -24 47 -19 47 20 0 27 4 35 18 35 9 0 22 -10 27 -22z"/>
10 |     </g>
11 | </svg>


--------------------------------------------------------------------------------
/web/src/App.vue:
--------------------------------------------------------------------------------
 1 | <template lang="pug">
 2 | div.d-flex.flex-column.min-vh-100.w-100
 3 |   header
 4 |     nav.navbar.navbar-expand
 5 |       .container-fluid
 6 |         router-link.ms-2.navbar-brand(to="/")
 7 |           img.favicon-inversion-filter(src="/favicon-black.svg" alt="" width="36" height="36")
 8 |         .navbar-nav.me-auto
 9 |         .navbar-nav
10 |           router-link.nav-link(to="/bookmarks" data-bs-toggle="tooltip" data-bs-placement="top" title="Bookmarks")
11 |             div.text-center
12 |               i.bi.bi-bookmark
13 |           router-link.nav-link(to="/help" data-bs-toggle="tooltip" data-bs-placement="top" title="Help")
14 |             div.text-center
15 |               i.bi.bi-question-circle-fill
16 |   .mb-3
17 |     router-view
18 |   footer.footer.mt-auto.text-end.small.mb-3
19 |     .container.small
20 |       div {{ get_label ("stamp") }}
21 |       a(href="https://github.com/nexus-stc/stc/issues/new?assignees=&labels=&projects=&template=bug_report.md&title=") {{ get_label("report_a_bug") }}
22 |       span &nbsp;|&nbsp;
23 |       a(href="/#/help") {{ get_label("help") }}
24 | </template>
25 | 
26 | <script lang="ts">
27 | import {defineComponent} from 'vue'
28 | import { RouterLink, RouterView } from 'vue-router'
29 | import {SearchProviderStatus} from "@/services/search/search-provider";
30 | 
31 | export default defineComponent({
32 |   name: 'App',
33 |   components: {
34 |     RouterLink,
35 |     RouterView
36 |   },
37 | })
38 | </script>
39 | <style scoped lang="scss">
40 | .nav-link div {
41 |   font-size: 75%;
42 | }
43 | .nav-link i {
44 |   font-size: 200%;
45 | }
46 | .logo {
47 |   background: linear-gradient(90deg, #644494, #FE9609);
48 |   -webkit-background-clip: text;
49 |   -webkit-text-fill-color: transparent;
50 | }
51 | @media (prefers-color-scheme: dark) {
52 |   .logo {
53 |     background: linear-gradient(90deg, #FE9609, #644494);
54 |     -webkit-background-clip: text;
55 |     -webkit-text-fill-color: transparent;
56 |   }
57 | }
58 | </style>
59 | 


--------------------------------------------------------------------------------
/web/src/views/StcBoxView.vue:
--------------------------------------------------------------------------------
 1 | <template lang="pug">
 2 | .container
 3 |   h3 {{ get_label("stc_box") }}
 4 |   p You can set up STC on a small computer to act as a personal or group library.
 5 |   h5 Minimum Requirements
 6 |   p For testing, we used an Orange PI 5 with 16GB of RAM, a Sandisk 128GB MicroSD, and a 16TB Seagate Exos HDD attached externally by a USB cord, all of which demonstrated strong performance.
 7 |   p In general, you can use any computer with the following specs
 8 |   ul
 9 |     li A 4-core arm64 or x86-based CPU
10 |     li 16GB RAM
11 |     li 256GB+ of storage for the metadata database, 10TB+ for the entire dataset
12 |   p If you are using Orange PI, take a note on power supply. We have experienced issues when attached HDD through USB3.0 but everything was all right after switching to left USB2.0.
13 |   h5 System Configuration
14 |   h6 Burning the MicroSD Card
15 |   p Start by obtaining an OS image suitable for your hardware. Orange PI images can be downloaded from <a href="http://www.orangepi.org/html/serviceAndSupport/index.html" target="_blank">their official site</a>.
16 |   p Burn the image file (usually *.iso or *.img) onto your MicroSD card. On MacOS and Linux, this can be done using the <code>dd</code> utility (set <code>if</code> and <code>of</code> to correspond to your file and disk respectively):
17 |   pre
18 |     code
19 |       | sudo dd if=file.img of=/dev/sde status=progress conv=fsync
20 |   h6 IPFS Configuration
21 |   p Follow Steps 1 and 2 from the <a href="#/help/replicate">replication guide</a> to set up IPFS.
22 |   h5 Start Using It!
23 |   p <a href="#/help/install-ipfs">Install IPFS</a> on any desktop in the same LAN, and then open <a href="https://libstc.cc">STC</a>.
24 | </template>
25 | <script lang="ts">
26 | import { defineComponent } from 'vue'
27 | import {get_label} from "@/translations";
28 | export default defineComponent({
29 |   name: 'StcBoxView',
30 |   created () {
31 |     document.title = `${get_label("stc_box")} - Help - STC`
32 |   }
33 | })
34 | </script>
35 | 


--------------------------------------------------------------------------------
/library/user_manager/user_manager.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | 
 4 | class UserManager:
 5 |     def __init__(self):
 6 |         self.search_times = {}
 7 |         self.search_ban_times = {}
 8 |         self.tasks = set()
 9 |         self.limits = {}
10 | 
11 |     def add_search_time(self, user_id: str, search_time: float):
12 |         current_time = time.time()
13 |         search_times = self.search_times.get(user_id, [])
14 |         search_times.append(search_time)
15 |         counter = 0
16 | 
17 |         for i in reversed(search_times):
18 |             if i > current_time - 10:
19 |                 counter = counter + 1
20 |                 if counter > 5:
21 |                     self.search_ban_times[user_id] = current_time + int(60)
22 |                     del self.search_times[user_id]
23 |                     return
24 |             else:
25 |                 if counter == 1:
26 |                     del self.search_times[user_id]
27 |                     return
28 | 
29 |         if len(search_times) > 20:
30 |             self.search_ban_times[user_id] = current_time + int(120)
31 |             del self.search_times[user_id]
32 |             return
33 | 
34 |         self.search_times[user_id] = search_times
35 | 
36 |     def check_search_ban_timeout(self, user_id: str):
37 |         ban_time = self.search_ban_times.get(user_id)
38 |         if ban_time:
39 |             timeout = int(ban_time - time.time())
40 |             if timeout > 0:
41 |                 return timeout
42 |             del self.search_ban_times[user_id]
43 | 
44 |     def add_task(self, user_id, id):
45 |         self.tasks.add((user_id, id))
46 |         self.limits[user_id] = self.limits.get(user_id, 0) + 1
47 | 
48 |     def remove_task(self, user_id, id):
49 |         self.tasks.remove((user_id, id))
50 |         self.limits[user_id] = self.limits.get(user_id, 1) - 1
51 | 
52 |     def has_task(self, user_id, id):
53 |         return (user_id, id) in self.tasks
54 | 
55 |     def hit_limits(self, user_id):
56 |         return self.limits.get(user_id, 0) >= 3
57 | 


--------------------------------------------------------------------------------
/tgbot/promotions/promotions.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | promotions:
 4 |   - texts:
 5 |       en: 💬 The victory of humanity is inevitable
 6 |     weight: 1
 7 |   - texts:
 8 |       en: 💬 Shall build Standard Template Construct
 9 |     weight: 1
10 |   - texts:
11 |       en: 💬 Gaining knowledge is the only purpose of life
12 |     weight: 1
13 |   - texts:
14 |       en: 💬 Knowledge cannot belong
15 |     weight: 1
16 |   - texts:
17 |       en: 💬 Obey the path of discovery
18 |     weight: 1
19 |   - texts:
20 |       en: 💬 Research is the only and ultimate goal
21 |     weight: 1
22 |   - texts:
23 |       en: 💬 Intellectual property is not a valid form of property
24 |     weight: 1
25 |   - texts:
26 |       en: ⤴️ Stay tuned with us at @{related_channel}, [Twitter]({twitter_contact_url}) and [Reddit]({reddit_url})
27 |       es: ⤴️ Mantente en contacto con nosotros en @{related_channel}, [Twitter]({twitter_contact_url}) y [Reddit]({reddit_url})
28 |       it: ⤴️ Resta aggiornato con noi su @{related_channel}, [Twitter]({twitter_contact_url}) e [Reddit]({reddit_url})
29 |       pb: ⤴️ Fique ligado conosco em @{related_channel}, [Twitter]({twitter_contact_url}) e [Reddit]({reddit_url})
30 |       ru: ⤴️ Оставайся на связи с нами на @{related_channel}, [Twitter]({twitter_contact_url}) и в [Reddit]({reddit_url})
31 |     weight: 5
32 |   - texts:
33 |       en: 🧬 Join [Nexus Communities](https://t.me/+fPQIvxQmJGQ3MzU8), the spaces to discuss science
34 |     weight: 50
35 |   - texts:
36 |       en: 🔥 Join [our Reddit](https://www.reddit.com/r/science_nexus) to learn more about Nexus/STC
37 |     weight: 50
38 |   - texts:
39 |       en: 🐦 Subscribe to our [Twitter](https://twitter.com/the_superpirate) to receive news first
40 |     weight: 50
41 |   - texts:
42 |       en: ✉️ Subscribe to our [Telegram](https://t.me/nexus_search) to stay with us
43 |     weight: 50
44 |   - texts:
45 |       en: ⤴️ Try [Standard Template Construct](https://libstc.cc) library
46 |       ru: ⤴️ Заходи в библиотеку [Стандартных Шаблонных Конструкций](https://libstc.cc)
47 |     weight: 5
48 | 


--------------------------------------------------------------------------------
/web/src/views/DonateView.vue:
--------------------------------------------------------------------------------
 1 | <template lang="pug">
 2 | .container
 3 |   h3 {{ get_label("donate") }}
 4 |   p(v-html='get_label("donate_content")')
 5 |   .col-lg-6
 6 |     ul.nav.nav-tabs(id="currencies-tab" role="tablist")
 7 |       li.nav-item(role="presentation")
 8 |         button.nav-link.active(id="btc-tab" data-bs-toggle="tab" data-bs-target="#btc-tab-pane" type="button" role="tab" aria-controls="btc-tab-pane" aria-selected="true") BTC
 9 |       li.nav-item(role="presentation")
10 |         button.nav-link(id="eth-tab" data-bs-toggle="tab" data-bs-target="#eth-tab-pane" type="button" role="tab" aria-controls="eth-tab-pane" aria-selected="false") ETH
11 |       li.nav-item(role="presentation")
12 |         button.nav-link(id="xmr-tab" data-bs-toggle="tab" data-bs-target="#xmr-tab-pane" type="button" role="tab" aria-controls="xmr-tab-pane" aria-selected="false") XMR
13 | 
14 |     .tab-content.mt-3(id="currencies-tab-content")
15 |       .tab-pane.fade.show.active(id="btc-tab-pane" role="tabpanel" aria-labelledby="btc-tab")
16 |         p
17 |           code 357vJAFsYeCtLU36MYEgaDueg34rr5ajCy
18 |         p
19 |           img.favicon-inversion-filter.img-fluid(src="@/assets/btc.svg")
20 |       .tab-pane.fade(id="eth-tab-pane" role="tabpanel" aria-labelledby="eth-tab")
21 |         p
22 |           code 0x199bECe965e4e1e2fE3065d3F551Ebe8520AC555
23 |         p
24 |           img.favicon-inversion-filter.img-fluid(src="@/assets/eth.svg")
25 |       .tab-pane.fade(id="xmr-tab-pane" role="tabpanel" aria-labelledby="xmr-tab")
26 |         p
27 |           code 42HZx5Cg1uQ2CtCrq7QabP23BN7gBrGu6U6QumkMmR4bKS61gcoP8xyNzP5cJCbjac9yaWFhLsDmM3adMWyBKBXn1d9WiUb
28 |         p
29 |           img.favicon-inversion-filter.img-fluid(src="@/assets/xmr.svg")
30 | </template>
31 | <script lang="ts">
32 | import { defineComponent } from 'vue'
33 | 
34 | import { get_label } from '../translations'
35 | export default defineComponent({
36 |   name: 'DonateView',
37 |   created () {
38 |     document.title = `${get_label("donate")} - Help - STC`
39 |   },
40 |   methods: { get_label }
41 | })
42 | </script>
43 | 


--------------------------------------------------------------------------------
/web/vite.config.ts:
--------------------------------------------------------------------------------
 1 | import { fileURLToPath, URL } from 'node:url'
 2 | 
 3 | import react from '@vitejs/plugin-react'
 4 | import vue from '@vitejs/plugin-vue'
 5 | import { defineConfig } from 'vite'
 6 | import topLevelAwait from 'vite-plugin-top-level-await'
 7 | import wasm from 'vite-plugin-wasm'
 8 | import vuePugPlugin from 'vue-pug-plugin'
 9 | 
10 | import summa_config from './summa-config.json'
11 | 
12 | // https://vitejs.dev/config/
13 | export default defineConfig({
14 |   base: '',
15 |   build: {
16 |     rollupOptions: {
17 |       input: {
18 |         index: './index.html'
19 |       },
20 |       output: [
21 |         {
22 |           name: 'assets/[name].[hash].js'
23 |         }
24 |       ]
25 |     },
26 |     target: 'esnext'
27 |   },
28 |   plugins: [
29 |     react({
30 |       include: '**/*.vue'
31 |     }),
32 |     vue({
33 |       template: {
34 |         preprocessOptions: {
35 |           // 'preprocessOptions' is passed through to the pug compiler
36 |           plugins: [vuePugPlugin]
37 |         }
38 |       }
39 |     }),
40 |     wasm(),
41 |     topLevelAwait(),
42 |   ],
43 |   worker: {
44 |     format: 'es',
45 |     plugins: [wasm()]
46 |   },
47 |   resolve: {
48 |     alias: {
49 |       '@': fileURLToPath(new URL('./src', import.meta.url)),
50 |       '~': fileURLToPath(new URL('./node_modules', import.meta.url))
51 |     },
52 |     preserveSymlinks: true
53 |   },
54 |   server: {
55 |     fs: {
56 |       // Allow serving files from one level up to the project root
57 |       allow: ['..']
58 |     },
59 |     proxy: {
60 |       '^/data': {
61 |         target: `${summa_config.ipfs_http_base_url}/ipns/standard-template-construct.org/data`,
62 |         changeOrigin: true,
63 |         secure: false,
64 |         rewrite: (path) => path.replace(/^\/data/, ''),
65 |       },
66 |       '^/images/wiki': {
67 |         target: `${summa_config.ipfs_http_base_url}/ipns/en.wikipedia-on-ipfs.org/I`,
68 |         changeOrigin: true,
69 |         secure: false,
70 |         rewrite: (path) => path.replace(/^\/images\/wiki/, ''),
71 |       }
72 |     }
73 |   }
74 | })


--------------------------------------------------------------------------------
/tgbot/handlers/roll.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import time
 3 | 
 4 | from telethon import events
 5 | 
 6 | from library.telegram.base import RequestContext
 7 | from tgbot.views.telegram.base_holder import BaseTelegramDocumentHolder
 8 | 
 9 | from .base import BaseHandler
10 | 
11 | 
12 | class RollHandler(BaseHandler):
13 |     filter = events.NewMessage(incoming=True, pattern=re.compile(r'^/roll(?:@\w+)?(.*)?$', re.DOTALL))
14 |     is_group_handler = True
15 | 
16 |     async def handler(self, event: events.ChatAction, request_context: RequestContext):
17 |         start_time = time.time()
18 | 
19 |         session_id = self.generate_session_id()
20 |         request_context.add_default_fields(mode='roll', session_id=session_id)
21 |         string_query = event.pattern_match.group(1).strip()
22 | 
23 |         query, query_traits = self.application.search_request_builder.process(
24 |             string_query,
25 |             is_fieldnorms_scoring_enabled=False,
26 |             collector='reservoir_sampling',
27 |             limit=1,
28 |             default_query_language=request_context.chat['language'],
29 |         )
30 |         documents = await self.application.summa_client.search_documents(query)
31 | 
32 |         if documents:
33 |             holder = BaseTelegramDocumentHolder(documents[0])
34 |             promo = self.application.promotioner.choose_promotion(query_traits.query_language)
35 |             view = holder.view_builder(query_traits.query_language).add_view(bot_name=request_context.bot_name).add_new_line(2).add(promo, escaped=True).build()
36 |             buttons_builder = holder.buttons_builder(query_traits.query_language)
37 | 
38 |             if request_context.is_group_mode():
39 |                 buttons_builder.add_remote_download_button(bot_name=request_context.bot_name)
40 |             else:
41 |                 buttons_builder.add_download_button()
42 |                 buttons_builder.add_close_button()
43 | 
44 |             request_context.statbox(action='show', duration=time.time() - start_time)
45 |             await event.respond(view, buttons=buttons_builder.build(), link_preview=True)
46 | 


--------------------------------------------------------------------------------
/geck/stc_geck/utils.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import logging
 3 | import os
 4 | import re
 5 | import socket
 6 | import tempfile
 7 | from urllib.parse import quote
 8 | 
 9 | import ipfs_hamt_directory_py
10 | 
11 | NON_ALNUMWHITESPACE_REGEX = re.compile(r'([^\s\w])+')
12 | MULTIWHITESPACE_REGEX = re.compile(r"\s+")
13 | 
14 | 
15 | def cast_string_to_single_string(s):
16 |     processed = MULTIWHITESPACE_REGEX.sub(' ', NON_ALNUMWHITESPACE_REGEX.sub(' ', s))
17 |     processed = processed.strip().replace(' ', '-')
18 |     return processed
19 | 
20 | 
21 | async def create_car(output_car, documents, limit, name_template) -> str:
22 |     with tempfile.TemporaryDirectory() as td:
23 |         input_data = os.path.join(td, 'input_data.txt')
24 |         with open(input_data, 'wb') as f:
25 |             async for document in documents:
26 |                 if limit <= 0:
27 |                     break
28 |                 id_ = document.get('doi') or document.get('md5')
29 |                 item_name = name_template.format(
30 |                     title=cast_string_to_single_string(document['title']) if 'title' in document else id_,
31 |                     id=id_,
32 |                     md5=document.get('md5'),
33 |                     doi=document.get('doi'),
34 |                     extension=document.get('metadata', {}).get('extension', 'pdf'),
35 |                 )
36 |                 f.write(quote(item_name, safe='').encode())
37 |                 f.write(b' ')
38 |                 f.write(document['cid'].encode())
39 |                 f.write(b' ')
40 |                 f.write(str(document.get('filesize') or 0).encode())
41 |                 f.write(b'\n')
42 |                 limit -= 1
43 |         return await asyncio.get_event_loop().run_in_executor(
44 |             None, lambda: ipfs_hamt_directory_py.from_file(input_data, output_car, td),
45 |         )
46 | 
47 | 
48 | def is_endpoint_listening(endpoint):
49 |     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
50 |     ip, port = endpoint.split(':')
51 |     try:
52 |         is_open = sock.connect_ex((ip, int(port))) == 0
53 |         sock.close()
54 |         return is_open
55 |     except socket.gaierror as e:
56 |         logging.getLogger('warning').warning({'action': 'warning', 'error': str(e)})
57 |         return False
58 | 


--------------------------------------------------------------------------------
/web/src/views/DocumentView.vue:
--------------------------------------------------------------------------------
 1 | <template lang="pug">
 2 | .container
 3 |   loading-spinner(v-if="is_loading" style="margin-top: 140px" :label="get_label('loading_document') + '...'")
 4 |   connectivity-issues-view(v-else-if="is_loading_failed")
 5 |   div(v-else-if="not_found") Not found
 6 |   document(v-else-if="document" :document="document")
 7 | </template>
 8 | 
 9 | <script lang="ts">
10 | import { defineComponent } from 'vue'
11 | 
12 | import ConnectivityIssuesView from '@/components/ConnectivityIssues.vue'
13 | import LoadingSpinner from '@/components/LoadingSpinner.vue'
14 | import Document from "@/components/Document.vue";
15 | import DocumentButtons from "@/components/DocumentButtons.vue";
16 | import TagsList from "@/components/TagsList.vue";
17 | import ReferencesList from "@/components/ReferencesList.vue";
18 | 
19 | export default defineComponent({
20 |   name: 'DocumentView',
21 |   components: {
22 |     ConnectivityIssuesView,
23 |     DocumentButtons,
24 |     LoadingSpinner,
25 |     ReferencesList,
26 |     TagsList,
27 |     Document
28 |   },
29 |   props: {
30 |     id: {
31 |       type: String,
32 |       required: true
33 |     }
34 |   },
35 |   data () {
36 |     return {
37 |       document: undefined,
38 |       is_loading: false,
39 |       is_loading_failed: false,
40 |       not_found: false,
41 |     }
42 |   },
43 |   watch: {
44 |     $route () {
45 |       void this.submit()
46 |     }
47 |   },
48 |   async created () {
49 |     await this.submit()
50 |   },
51 |   methods: {
52 |     async submit () {
53 |       try {
54 |         this.is_loading = true
55 |         const collector_outputs = await this.search_service.search(this.id, {
56 |           page: 1,
57 |           page_size: 1,
58 |           index_name: this.index_name
59 |         })
60 |         const scored_documents = collector_outputs[0].collector_output.documents.scored_documents
61 |         if (scored_documents.length === 0) {
62 |           this.not_found = true;
63 |           return;
64 |         }
65 |         this.not_found = false;
66 |         this.document = JSON.parse(scored_documents[0].document)
67 |         document.title = `${this.document.title} - STC`
68 |       } catch (e) {
69 |         this.is_loading_failed = true
70 |       } finally {
71 |         this.is_loading = false
72 |       }
73 |     }
74 |   }
75 | })
76 | </script>
77 | 


--------------------------------------------------------------------------------
/tgbot/handlers/q.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import re
 3 | 
 4 | from bs4 import BeautifulSoup
 5 | from telethon import events
 6 | 
 7 | from library.telegram.base import RequestContext
 8 | from library.telegram.common import close_button
 9 | from library.textutils.utils import remove_markdown
10 | 
11 | from ..translations import t
12 | from .base import BaseHandler
13 | from ..views.telegram.common import encode_query_to_deep_link
14 | 
15 | 
16 | class QHandler(BaseHandler):
17 |     filter = events.NewMessage(incoming=True, pattern=re.compile(r'^/q(?:@\w+)?(?:\s+(.*))?$', re.DOTALL))
18 |     is_group_handler = True
19 | 
20 |     async def handler(self, event: events.ChatAction, request_context: RequestContext):
21 |         session_id = self.generate_session_id()
22 |         request_context.add_default_fields(mode='cybrex', session_id=session_id)
23 |         request_context.statbox(action='show', sender_id=event.sender_id)
24 | 
25 |         query = event.pattern_match.group(1)
26 |         if not query:
27 |             text = "Send query for semantic search after `/q`: `/q What is hemoglobin?`"
28 |             return await event.reply(text)
29 |         query = query.strip()
30 | 
31 |         scored_chunks = await self.application.cybrex_ai.semantic_search(query, n_chunks=3, n_documents=0)
32 |         response = f'🤔 **{query}**'
33 | 
34 |         references = []
35 |         for scored_chunk in scored_chunks[:3]:
36 |             field, value = scored_chunk.chunk.document_id.split(':', 2)
37 | 
38 |             document_id = f'{field}:{value}'
39 |             title = scored_chunk.chunk.title.replace('\n', ' - ')
40 |             text_title = BeautifulSoup(title or '', 'lxml').get_text(separator='')
41 |             deep_query = encode_query_to_deep_link(document_id, bot_name=request_context.bot_name)
42 |             if deep_query:
43 |                 reference = f' - **{text_title}** - [{document_id}]({deep_query})'
44 |             else:
45 |                 reference = f' - **{text_title}** - `{document_id}`'
46 |             reference += f'\n**Text:** {remove_markdown(scored_chunk.chunk.text)}'
47 |             references.append(reference)
48 | 
49 |         references = '\n\n'.join(references)
50 |         if references:
51 |             response += f'\n\n**References:**\n\n{references}'
52 |         return await event.reply(response, buttons=[close_button()])
53 | 


--------------------------------------------------------------------------------
/tgbot/configs/logging.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | 
 3 | logging:
 4 |   disable_existing_loggers: false
 5 |   formatters:
 6 |     base:
 7 |       class: izihawa_loglib.formatters.BaseFormatter
 8 |     default:
 9 |       class: izihawa_loglib.formatters.DefaultFormatter
10 |     traceback:
11 |       class: izihawa_loglib.formatters.TracebackFormatter
12 |   handlers:
13 |     console:
14 |       class: logging.StreamHandler
15 |       level: INFO
16 |       stream: 'ext://sys.stderr'
17 |     debug:
18 |       class: izihawa_loglib.handlers.BaseFileHandler
19 |       filename: '{{ log_path }}/debug.log'
20 |       formatter: default
21 |       level: DEBUG
22 |     error:
23 |       class: izihawa_loglib.handlers.BaseFileHandler
24 |       filename: '{{ log_path }}/error.log'
25 |       formatter: default
26 |       level: ERROR
27 |     operation:
28 |       class: izihawa_loglib.handlers.BaseFileHandler
29 |       filename: '{{ log_path }}/operation.log'
30 |       formatter: base
31 |       level: DEBUG
32 |     statbox:
33 |       class: izihawa_loglib.handlers.BaseFileHandler
34 |       filename: '{{ log_path }}/statbox.log'
35 |       formatter: default
36 |       level: INFO
37 |     traceback:
38 |       class: izihawa_loglib.handlers.BaseFileHandler
39 |       filename: '{{ log_path }}/traceback.log'
40 |       formatter: traceback
41 |       level: ERROR
42 |     warning:
43 |       class: izihawa_loglib.handlers.BaseFileHandler
44 |       filename: '{{ log_path }}/warning.log'
45 |       formatter: default
46 |       level: WARNING
47 |   loggers:
48 |     aiobaseclient:
49 |       handlers:
50 |         - error
51 |         - warning
52 |       propagate: false
53 |     chardet:
54 |       handlers:
55 |         - error
56 |       propagate: false
57 |     debug:
58 |       handlers:
59 |         - debug
60 |       propagate: false
61 |     error:
62 |       handlers:
63 |         - console
64 |         - error
65 |         - traceback
66 |         - warning
67 |       propagate: false
68 |     operation:
69 |       handlers:
70 |         - operation
71 |       propagate: false
72 |     statbox:
73 |       handlers:
74 |         - console
75 |         - statbox
76 |       propagate: false
77 |     telethon:
78 |       handlers:
79 |         - error
80 |         - warning
81 |       propagate: false
82 |   root:
83 |     handlers:
84 |       - debug
85 |     level: DEBUG
86 |   version: 1
87 | 


--------------------------------------------------------------------------------
/web/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "summa-web",
 3 |   "version": "0.0.0",
 4 |   "scripts": {
 5 |     "dev": "vite --mode development",
 6 |     "build": "run-p type-check build-only",
 7 |     "preview": "vite preview --port 4173",
 8 |     "build-only": "vite build --config vite.config.ts && vite build --config vite-sw.config.ts",
 9 |     "type-check": "vue-tsc --noEmit",
10 |     "lint": "eslint . --ext .vue,.js,.jsx,.cjs,.mjs,.ts,.tsx,.cts,.mts --fix --ignore-path .gitignore",
11 |     "publish": "bash publi.sh"
12 |   },
13 |   "dependencies": {
14 |     "@grpc/grpc-js": "^1.9.7",
15 |     "@protobuf-ts/grpcweb-transport": "^2.9.1",
16 |     "@vueuse/core": "^10.2.1",
17 |     "@vueuse/rxjs": "^10.2.1",
18 |     "axios": "^1.4.0",
19 |     "bootstrap": "^5.3.0",
20 |     "comlink": "^4.4.1",
21 |     "crypto-js": "^4.1.1",
22 |     "detect-browser": "^5.3.0",
23 |     "dexie": "^3.2.4",
24 |     "epubjs": "^0.3.93",
25 |     "google-protobuf": "^3.21.2",
26 |     "grpc-web": "^1.4.2",
27 |     "hammerjs": "^2.0.8",
28 |     "npm-run-all": "^4.1.5",
29 |     "pdfjs-dist": "^3.11.174",
30 |     "qr-creator": "^1.0.0",
31 |     "summa-wasm": "^0.135.7",
32 |     "vite-plugin-require": "^1.1.11",
33 |     "vite-plugin-top-level-await": "^1.3.1",
34 |     "vite-plugin-wasm": "^3.2.2",
35 |     "vue": "^3.3.4",
36 |     "vue-router": "^4.2.4",
37 |     "zingtouch": "^1.0.6"
38 |   },
39 |   "devDependencies": {
40 |     "@protobuf-ts/plugin": "^2.9.1",
41 |     "@rushstack/eslint-patch": "^1.3.2",
42 |     "@tsconfig/node18": "^18.2.0",
43 |     "@types/node": "^20.4.2",
44 |     "@typescript-eslint/eslint-plugin": "^5.62.0",
45 |     "@vitejs/plugin-react": "^4.0.3",
46 |     "@vitejs/plugin-vue": "^4.2.3",
47 |     "@vue/eslint-config-prettier": "^8.0.0",
48 |     "@vue/eslint-config-typescript": "^11.0.3",
49 |     "@vue/tsconfig": "^0.4.0",
50 |     "bootstrap-icons": "^1.10.5",
51 |     "djvujs-dist": "^0.5.4",
52 |     "eslint": "^8.45.0",
53 |     "eslint-config-airbnb-base": "^15.0.0",
54 |     "eslint-config-standard-with-typescript": "^37.0.0",
55 |     "eslint-plugin-import": "^2.27.5",
56 |     "eslint-plugin-n": "^16.0.1",
57 |     "eslint-plugin-promise": "^6.1.1",
58 |     "eslint-plugin-simple-import-sort": "^10.0.0",
59 |     "eslint-plugin-vue": "^9.15.1",
60 |     "kubo-rpc-client": "^3.0.1",
61 |     "prettier": "^3.0.0",
62 |     "sass": "^1.64.0",
63 |     "ts-node": "^10.9.1",
64 |     "typescript": "^5.1.6",
65 |     "vite": "^4.4.9",
66 |     "vue-pug-plugin": "^2.0.3",
67 |     "vue-tsc": "^1.8.5"
68 |   }
69 | }
70 | 


--------------------------------------------------------------------------------
/tgbot/markdownifytg.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from markdownify import (
 4 |     MarkdownConverter,
 5 |     abstract_inline_conversion, chomp,
 6 | )
 7 | 
 8 | html_heading_re = re.compile(r'(h[1-6]|header|title)')
 9 | 
10 | 
11 | class Converter(MarkdownConverter):
12 |     convert_b = abstract_inline_conversion(lambda self: '**')
13 |     convert_i = abstract_inline_conversion(lambda self: '__')
14 |     convert_em = abstract_inline_conversion(lambda self: '__')
15 | 
16 |     def convert_header(self, el, text, convert_as_inline):
17 |         return '\n' + super().convert_b(el, text, convert_as_inline) + '\n'
18 | 
19 |     def convert_hn(self, n, el, text, convert_as_inline):
20 |         return '\n' + super().convert_b(el, text, convert_as_inline) + '\n'
21 | 
22 |     def convert_hr(self,  el, text, convert_as_inline):
23 |         return ''
24 | 
25 |     def convert_title(self, el, text, convert_as_inline):
26 |         return super().convert_b(el, text, convert_as_inline) + '\n'
27 | 
28 |     def convert_formula(self, el, text, convert_as_inline):
29 |         return '🔢\n'
30 | 
31 |     def convert_a(self, el, text, convert_as_inline):
32 |         prefix, suffix, text = chomp(text)
33 |         if not text:
34 |             return ''
35 |         href = el.get('href')
36 |         return f'[{text}]({href})'
37 | 
38 |     def convert_img(self, el, text, convert_as_inline):
39 |         return '🖼️\n'
40 | 
41 |     def convert_table(self, el, text, convert_as_inline):
42 |         return '🔢\n'
43 | 
44 | 
45 | class SnippetConverter(MarkdownConverter):
46 |     convert_highlight = abstract_inline_conversion(lambda self: '**')
47 |     convert_i = abstract_inline_conversion(lambda self: '')
48 |     convert_header = abstract_inline_conversion(lambda self: '')
49 | 
50 |     def convert_hn(self, n, el, text, convert_as_inline):
51 |         return text
52 | 
53 |     def convert_hr(self,  el, text, convert_as_inline):
54 |         return ''
55 | 
56 |     def convert_title(self, el, text, convert_as_inline):
57 |         return text
58 | 
59 |     def convert_formula(self, el, text, convert_as_inline):
60 |         return '🔢\n'
61 | 
62 |     def convert_img(self, el, text, convert_as_inline):
63 |         return '🖼️\n'
64 | 
65 |     def convert_table(self, el, text, convert_as_inline):
66 |         return '🔢\n'
67 | 
68 | 
69 | md_converter = Converter(escape_asterisks=False)
70 | highlight_md_converter = SnippetConverter(escape_asterisks=False)
71 | 
72 | 
73 | def md(html, **options):
74 |     return Converter(**options).convert(html)
75 | 


--------------------------------------------------------------------------------
/library/telegram/session_backend/core_postgres.py:
--------------------------------------------------------------------------------
 1 | from typing import (
 2 |     Any,
 3 |     Union,
 4 | )
 5 | 
 6 | from sqlalchemy.dialects.postgresql import insert
 7 | from telethon.sessions.memory import _SentFileType
 8 | from telethon.tl.types import (
 9 |     InputDocument,
10 |     InputPhoto,
11 | )
12 | 
13 | from .core import AlchemyCoreSession
14 | 
15 | 
16 | class AlchemyPostgresCoreSession(AlchemyCoreSession):
17 |     def set_update_state(self, entity_id: int, row: Any) -> None:
18 |         t = self.UpdateState.__table__
19 |         values = dict(pts=row.pts, qts=row.qts, date=row.date.timestamp(),
20 |                       seq=row.seq, unread_count=row.unread_count)
21 |         with self.engine.begin() as conn:
22 |             conn.execute(insert(t)
23 |                          .values(session_id=self.session_id, entity_id=entity_id, **values)
24 |                          .on_conflict_do_update(constraint=t.primary_key, set_=values))
25 | 
26 |     def process_entities(self, tlo: Any) -> None:
27 |         rows = self._entities_to_rows(tlo)
28 |         if not rows:
29 |             return
30 | 
31 |         t = self.Entity.__table__
32 |         ins = insert(t)
33 |         upsert = ins.on_conflict_do_update(constraint=t.primary_key, set_={
34 |             "hash": ins.excluded.hash,
35 |             "username": ins.excluded.username,
36 |             "phone": ins.excluded.phone,
37 |             "name": ins.excluded.name,
38 |         })
39 |         with self.engine.begin() as conn:
40 |             conn.execute(upsert, [dict(session_id=self.session_id, id=row[0], hash=row[1],
41 |                                        username=row[2], phone=row[3], name=row[4])
42 |                                   for row in rows])
43 | 
44 |     def cache_file(self, md5_digest: str, file_size: int,
45 |                    instance: Union[InputDocument, InputPhoto]) -> None:
46 |         if not isinstance(instance, (InputDocument, InputPhoto)):
47 |             raise TypeError("Cannot cache {} instance".format(type(instance)))
48 | 
49 |         t = self.SentFile.__table__
50 |         values = dict(id=instance.id, hash=instance.access_hash)
51 |         with self.engine.begin() as conn:
52 |             conn.execute(insert(t)
53 |                          .values(session_id=self.session_id, md5_digest=md5_digest,
54 |                                  type=_SentFileType.from_type(type(instance)).value,
55 |                                  file_size=file_size, **values)
56 |                          .on_conflict_do_update(constraint=t.primary_key, set_=values))
57 | 


--------------------------------------------------------------------------------
/tgbot/handlers/view.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import time
 3 | 
 4 | from telethon import (
 5 |     events,
 6 |     functions,
 7 | )
 8 | from telethon.errors import MessageIdInvalidError
 9 | 
10 | from library.telegram.base import RequestContext
11 | from tgbot.translations import t
12 | from tgbot.views.telegram.base_holder import BaseTelegramDocumentHolder
13 | 
14 | from .base import BaseHandler
15 | 
16 | 
17 | def is_earlier_than_2_days(message):
18 |     if message.date:
19 |         return time.time() - time.mktime(message.date.timetuple()) < 2 * 24 * 60 * 60 - 10
20 | 
21 | 
22 | class ViewHandler(BaseHandler):
23 |     filter = events.NewMessage(incoming=True, pattern='^/v_([A-Za-z0-9_-]+)')
24 | 
25 |     def parse_pattern(self, event: events.ChatAction):
26 |         cid = event.pattern_match.group(1)
27 |         return cid
28 | 
29 |     async def get_message(self, message_id, request_context: RequestContext):
30 |         get_message_request = functions.messages.GetMessagesRequest(id=[message_id])
31 |         messages = await self.application.get_telegram_client(request_context.bot_name)(get_message_request)
32 |         return messages.messages[0]
33 | 
34 |     async def handler(self, event: events.ChatAction, request_context: RequestContext):
35 |         cid = self.parse_pattern(event)
36 | 
37 |         request_context.add_default_fields(mode='view', cid=cid)
38 |         request_context.statbox(action='view')
39 | 
40 |         language = request_context.chat['language']
41 | 
42 |         try:
43 |             prefetch_message = await event.reply(t("SEARCHING", request_context.chat['language']))
44 |             document = await self.application.summa_client.get_one_by_field_value('nexus_science', 'links.cid', cid)
45 |             if not document:
46 |                 return await event.reply(t("OUTDATED_VIEW_LINK", language))
47 |             holder = BaseTelegramDocumentHolder(document)
48 |             promo = self.application.promotioner.choose_promotion(language)
49 |             view_builder = holder.view_builder(language).add_view(bot_name=request_context.bot_name).add_new_line(2).add(promo, escaped=True)
50 |             buttons = holder.buttons_builder(language).add_default_layout(
51 |                 bot_name=request_context.bot_name,
52 |                 is_group_mode=request_context.is_group_mode(),
53 |             ).build()
54 |             return await asyncio.gather(
55 |                 event.delete(),
56 |                 prefetch_message.edit(view_builder.build(), buttons=buttons, link_preview=holder.has_cover()),
57 |             )
58 |         except MessageIdInvalidError:
59 |             return await event.reply(t("VIEWS_CANNOT_BE_SHARED", language))
60 | 


--------------------------------------------------------------------------------
/tgbot/handlers/start.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | 
 3 | from telethon import events
 4 | 
 5 | from library.telegram.base import RequestContext
 6 | from tgbot.translations import t
 7 | from tgbot.views.telegram.common import (
 8 |     DecodeDeepQueryError,
 9 |     decode_deep_query, recode_base64_to_base36,
10 | )
11 | 
12 | from .search import BaseSearchHandler
13 | 
14 | 
15 | class StartHandler(BaseSearchHandler):
16 |     filter = events.NewMessage(incoming=True, pattern='^/start\\s?(.*)?')
17 | 
18 |     async def handler(self, event: events.ChatAction, request_context: RequestContext):
19 |         raw_query = event.pattern_match.group(1)
20 |         string_query = None
21 | 
22 |         request_context.statbox(action='start', mode='start', text=event.text)
23 | 
24 |         try:
25 |             string_query = decode_deep_query(raw_query)
26 |         except DecodeDeepQueryError as e1:
27 |             try:
28 |                 cid = recode_base64_to_base36(raw_query)
29 |                 string_query = f'links.cid:{cid}'
30 |             except DecodeDeepQueryError as e2:
31 |                 request_context.error_log(e1, mode='start', raw_query=raw_query)
32 |                 request_context.error_log(e2, mode='start', raw_query=raw_query)
33 | 
34 |         if string_query:
35 |             request_context.statbox(action='query', mode='start', query=string_query)
36 |             request_message = await self.application.get_telegram_client(request_context.bot_name).send_message(event.chat, string_query)
37 |             prefetch_message = await request_message.reply(
38 |                 t("SEARCHING", request_context.chat['language']),
39 |             )
40 |             try:
41 |                 text, buttons, link_preview = await self.setup_widget(
42 |                     request_context=request_context,
43 |                     string_query=string_query,
44 |                     is_shortpath_enabled=True,
45 |                 )
46 |                 edit_action = self.application.get_telegram_client(request_context.bot_name).edit_message(
47 |                     request_context.chat['chat_id'],
48 |                     prefetch_message.id,
49 |                     text,
50 |                     buttons=buttons,
51 |                     link_preview=link_preview,
52 |                 )
53 |                 await asyncio.gather(
54 |                     event.delete(),
55 |                     edit_action,
56 |                 )
57 |             except Exception:
58 |                 await prefetch_message.delete()
59 |                 raise
60 |         else:
61 |             request_context.statbox(action='show', mode='start')
62 |             await event.reply(t('HELP', request_context.chat['language']))
63 | 


--------------------------------------------------------------------------------
/web/src/views/Reader.vue:
--------------------------------------------------------------------------------
 1 | <template lang="pug">
 2 | .container(v-if="error !== undefined")
 3 |   .row
 4 |     .col-md-8.offset-md-2
 5 |       connectivity-issues-view(:reason="error")
 6 | .container.col-md-8.offset-md-2(v-else-if="downloading_status !== undefined")
 7 |   loading-spinner(style="margin-top: 140px" :label="downloading_status")
 8 | div(v-else-if="data !== undefined")
 9 |   epub-reader.inversion-filter(v-if="filename.endsWith('epub')" :anchor="anchor" :data="data" v-on:update-anchor="update_anchor")
10 |   djvu-reader(v-else-if="filename.endsWith('djvu')" :anchor="anchor" :data="data" v-on:update-anchor="update_anchor")
11 |   pdf-reader(v-else-if="filename.endsWith('pdf')" :anchor="anchor" :data="data" v-on:update-anchor="update_anchor")
12 | </template>
13 | 
14 | <script lang="ts">
15 | import {defineComponent, type PropType, toRef} from 'vue'
16 | 
17 | import {cid_local_link} from "@/components/BaseDocument.vue";
18 | import router from "@/router";
19 | import {tracked_download} from "@/components/download-progress";
20 | import ConnectivityIssuesView from "@/components/ConnectivityIssues.vue";
21 | import LoadingSpinner from "@/components/LoadingSpinner.vue";
22 | import {get_label} from "@/translations";
23 | import DjvuReader from "@/components/DjvuReader.vue";
24 | import EpubReader from "@/components/EpubReader.vue";
25 | import PdfReader from "@/components/PdfReader.vue";
26 | 
27 | export default defineComponent({
28 |   name: 'Reader',
29 |   components: {PdfReader, EpubReader, DjvuReader, ConnectivityIssuesView, LoadingSpinner},
30 |   props: {
31 |     cid: {
32 |       type: undefined as PropType<string> | undefined
33 |     },
34 |     filename: {
35 |       type: undefined as PropType<string> | undefined
36 |     },
37 |     anchor: {
38 |       type: undefined as PropType<string> | undefined
39 |     }
40 |   },
41 |   data() {
42 |     return {
43 |       data: undefined,
44 |       downloading_status: get_label("loading") + "...",
45 |       error: undefined,
46 |       mounted: false,
47 |       rendition: undefined,
48 |     }
49 |   },
50 |   async created() {
51 |     const local_link = cid_local_link(this.cid, this.filename);
52 |     try {
53 |       const files = await tracked_download([local_link.url], toRef(this, 'downloading_status'));
54 |       this.data = files[0];
55 |       this.error = undefined
56 |     } catch (e) {
57 |       this.error = e;
58 |       return;
59 |     }
60 |   },
61 |   methods: {
62 |     update_anchor(new_anchor: string) {
63 |       router.replace({
64 |         name: 'reader',
65 |         query: {
66 |           cid: this.cid,
67 |           filename: this.filename,
68 |           anchor: new_anchor,
69 |         }
70 |       })
71 |     },
72 |   }
73 | })
74 | </script>
75 | 


--------------------------------------------------------------------------------
/cybrex/examples/on-the-fly-translation.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {
 7 |     "collapsed": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": [
11 |     "from transformers import MBartForConditionalGeneration, MBart50TokenizerFast\n",
12 |     "\n",
13 |     "model = MBartForConditionalGeneration.from_pretrained(\"facebook/mbart-large-50-many-to-many-mmt\")\n",
14 |     "tokenizer = MBart50TokenizerFast.from_pretrained(\"facebook/mbart-large-50-many-to-many-mmt\")"
15 |    ]
16 |   },
17 |   {
18 |    "cell_type": "code",
19 |    "execution_count": null,
20 |    "outputs": [],
21 |    "source": [
22 |     "tokenizer.lang_code_to_id"
23 |    ],
24 |    "metadata": {
25 |     "collapsed": false
26 |    }
27 |   },
28 |   {
29 |    "cell_type": "code",
30 |    "execution_count": null,
31 |    "outputs": [],
32 |    "source": [
33 |     "article = \"Forty-two patients operated on by skin expansion have been contacted after a mean time of 25 months from the last surgery. Two biopsies have been taken from the expanded area of each patient. In 12 patients it has been possible to obtain a similar sampling from the opposite, nonexpanded area of the body. The samples underwent optic microscopy and cell kinetic and DNA content investigations. The epidermal structure of the followed-up skin, compared with the skin of the opposite side of the body, looks normal. The mitotic activity of the epidermal cells has returned to the values of preexpanded skin. The dermis shows a low degree of elastosis and zonal fragmentation of elastic fibers. The hypodermis, where the expander capsule was removed during the last surgery, does not show an accentuated fibrosis.\"\n",
34 |     "tokenizer.src_lang = \"en_XX\"\n",
35 |     "inputs = tokenizer(article, return_tensors=\"pt\")\n",
36 |     "\n",
37 |     "translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id[\"ru_RU\"], max_length=1024)\n",
38 |     "tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]"
39 |    ],
40 |    "metadata": {
41 |     "collapsed": false
42 |    }
43 |   },
44 |   {
45 |    "cell_type": "code",
46 |    "execution_count": null,
47 |    "outputs": [],
48 |    "source": [],
49 |    "metadata": {
50 |     "collapsed": false
51 |    }
52 |   }
53 |  ],
54 |  "metadata": {
55 |   "kernelspec": {
56 |    "display_name": "Python 3",
57 |    "language": "python",
58 |    "name": "python3"
59 |   },
60 |   "language_info": {
61 |    "codemirror_mode": {
62 |     "name": "ipython",
63 |     "version": 2
64 |    },
65 |    "file_extension": ".py",
66 |    "mimetype": "text/x-python",
67 |    "name": "python",
68 |    "nbconvert_exporter": "python",
69 |    "pygments_lexer": "ipython2",
70 |    "version": "2.7.6"
71 |   }
72 |  },
73 |  "nbformat": 4,
74 |  "nbformat_minor": 0
75 | }
76 | 


--------------------------------------------------------------------------------
/web/src/database.ts:
--------------------------------------------------------------------------------
 1 | import Dexie from 'dexie'
 2 | 
 3 | import { average } from '@/utils'
 4 | 
 5 | export class UserDb extends Dexie {
 6 |   bookmarks!: Dexie.Table<IBookmark, [string, string]>
 7 |   search_metrics!: Dexie.Table<ISearchMetric, []>
 8 | 
 9 |   constructor (name: string, version: number) {
10 |     super(name)
11 |     this.version(version).stores({
12 |       bookmarks: '[index_name+query],created_at',
13 |       search_metrics: 'created_at'
14 |     })
15 |     this.bookmarks.mapToClass(Bookmark)
16 |     this.search_metrics.mapToClass(SearchMetric)
17 |   }
18 | 
19 |   async add_search_metrics (search_metrics: SearchMetric) {
20 |     return await this.transaction('rw', this.search_metrics, async () => {
21 |       await this.search_metrics.offset(100).delete()
22 |       return await this.search_metrics.put(search_metrics)
23 |     })
24 |   }
25 | 
26 |   async get_average_spent (last_n_time: number) {
27 |     return await this.transaction('rw', this.search_metrics, async () => {
28 |       const result = await this.search_metrics
29 |         .orderBy('created_at')
30 |         .reverse()
31 |         .limit(last_n_time)
32 |         .toArray()
33 |       if (result.length < last_n_time) {
34 |         return undefined
35 |       }
36 |       return average(result.map((x) => x.spent))
37 |     })
38 |   }
39 | 
40 |   async add_bookmark (bookmark: IBookmark) {
41 |     return await this.transaction('rw', this.bookmarks, async () => {
42 |       return await this.bookmarks.put(bookmark)
43 |     })
44 |   }
45 | 
46 |   async get_all_bookmarks () {
47 |     return await this.transaction('rw', this.bookmarks, async () => {
48 |       return await this.bookmarks.orderBy('created_at').reverse().toArray()
49 |     })
50 |   }
51 | 
52 |   async has_bookmark (index_name: string, query: string) {
53 |     return await this.transaction('rw', this.bookmarks, async () => {
54 |       return (await this.bookmarks.get([index_name, query])) !== undefined
55 |     })
56 |   }
57 | 
58 |   async delete_bookmark (index_name: string, query: string) {
59 |     await this.transaction('rw', this.bookmarks, async () => {
60 |       await this.bookmarks.delete([index_name, query])
61 |     })
62 |   }
63 | }
64 | 
65 | interface IBookmark {
66 |   index_name: string
67 |   query: string
68 |   created_at: number
69 | }
70 | 
71 | export class Bookmark implements IBookmark {
72 |   index_name: string
73 |   query: string
74 |   created_at: number
75 | 
76 |   constructor (index_name: string, query: string) {
77 |     this.index_name = index_name
78 |     this.query = query
79 |     this.created_at = Date.now() / 1000
80 |   }
81 | }
82 | 
83 | interface ISearchMetric {
84 |   spent: number
85 |   created_at: number
86 | }
87 | 
88 | export class SearchMetric implements ISearchMetric {
89 |   spent: number
90 |   created_at: number
91 | 
92 |   constructor (spent: number) {
93 |     this.spent = spent
94 |     this.created_at = Date.now() / 1000
95 |   }
96 | }
97 | 
98 | export const user_db = new UserDb('UserDb', 8)
99 | 


--------------------------------------------------------------------------------
/web/public/safari-pinned-tab.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" standalone="no"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 20010904//EN"
 3 |  "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
 4 | <svg version="1.0" xmlns="http://www.w3.org/2000/svg"
 5 |  width="700.000000pt" height="700.000000pt" viewBox="0 0 700.000000 700.000000"
 6 |  preserveAspectRatio="xMidYMid meet">
 7 | <metadata>
 8 | Created by potrace 1.14, written by Peter Selinger 2001-2017
 9 | </metadata>
10 | <g transform="translate(0.000000,700.000000) scale(0.100000,-0.100000)"
11 | fill="#000000" stroke="none">
12 | <path d="M3319 6205 c-2 -1 -30 -6 -61 -9 -542 -62 -1002 -606 -983 -1161 4
13 | -135 45 -304 114 -479 18 -43 30 -80 29 -81 -2 -2 -83 -63 -180 -137 -208
14 | -157 -150 -113 -567 -424 l-336 -251 -105 53 c-58 29 -124 57 -148 62 -74 17
15 | -349 15 -417 -3 -127 -33 -301 -163 -383 -287 -162 -246 -160 -579 6 -808 84
16 | -115 241 -231 377 -278 166 -58 382 -42 532 39 192 104 331 281 383 488 37
17 | 147 31 229 -29 421 -12 36 -21 70 -21 77 0 22 61 82 164 164 56 43 106 83 111
18 | 87 6 5 109 82 230 172 210 156 253 188 444 328 46 34 88 62 95 62 14 0 57 -27
19 | 188 -115 157 -106 317 -171 491 -200 l37 -6 0 -788 0 -788 -77 -28 c-182 -65
20 | -297 -161 -404 -335 -244 -398 6 -948 476 -1045 77 -16 203 -21 255 -10 14 3
21 | 41 8 60 11 44 8 162 62 218 101 196 135 322 371 322 606 0 106 -15 170 -60
22 | 265 -104 218 -260 360 -453 416 l-59 17 5 767 c2 422 7 777 11 789 6 14 19 23
23 | 44 28 120 21 332 116 499 223 160 103 152 101 210 61 37 -25 447 -332 498
24 | -372 6 -4 93 -71 195 -149 298 -228 313 -239 368 -285 57 -48 60 -59 36 -124
25 | -23 -66 -23 -235 1 -363 10 -49 54 -149 92 -205 80 -122 185 -209 338 -283 89
26 | -42 90 -43 200 -43 286 1 405 43 560 200 151 152 226 326 221 515 -3 106 -21
27 | 169 -88 304 -79 157 -158 243 -289 314 -107 58 -195 77 -344 76 -169 -1 -267
28 | -33 -433 -140 -45 -30 -85 -54 -87 -54 -7 0 -862 643 -1022 768 -162 128 -155
29 | 107 -94 253 34 82 51 144 75 274 7 37 8 247 1 295 -22 162 -42 226 -114 380
30 | -95 202 -272 386 -482 503 -72 40 -225 94 -319 112 -73 15 -321 30 -331 20z
31 | m49 -725 c19 -25 40 -48 47 -53 22 -12 45 3 65 44 27 53 53 66 80 39 11 -11
32 | 21 -24 21 -30 1 -5 3 -14 4 -20 1 -5 3 -20 4 -33 0 -13 7 -31 14 -41 12 -16
33 | 17 -16 90 5 42 13 79 20 83 16 4 -4 0 -26 -9 -49 -9 -24 -19 -62 -23 -85 -6
34 | -40 -5 -43 17 -44 73 -1 123 -9 130 -20 18 -30 -2 -59 -76 -112 -36 -26 -32
35 | -53 14 -81 36 -23 71 -61 71 -78 0 -19 -35 -38 -73 -38 -54 0 -84 -9 -91 -25
36 | -2 -7 9 -39 25 -72 16 -33 27 -67 24 -76 -9 -23 -40 -27 -72 -10 -85 47 -84
37 | 46 -100 31 -8 -8 -17 -39 -20 -69 -7 -65 -18 -89 -41 -89 -23 0 -50 27 -82 79
38 | -13 23 -32 41 -41 41 -9 0 -29 -19 -44 -42 -43 -66 -64 -86 -86 -81 -21 6 -26
39 | 21 -33 101 -3 29 -8 57 -11 62 -8 13 -37 5 -98 -30 l-55 -31 -16 23 c-21 29
40 | -20 31 9 90 14 27 25 56 25 63 0 19 -30 34 -77 40 -56 7 -93 25 -93 45 0 11
41 | 25 33 66 59 76 48 76 54 -7 111 -86 59 -74 90 40 104 78 10 86 31 44 105 -44
42 | 80 -13 110 70 67 54 -27 82 -32 91 -17 3 4 8 35 12 67 4 33 11 65 17 72 18 23
43 | 50 8 85 -38z"/>
44 | </g>
45 | </svg>
46 | 


--------------------------------------------------------------------------------
/web/src/router/index.ts:
--------------------------------------------------------------------------------
  1 | // @ts-nocheck
  2 | import { createRouter, createWebHashHistory } from 'vue-router'
  3 | 
  4 | const router = createRouter({
  5 |   history: createWebHashHistory(import.meta.env.BASE_URL),
  6 |   scrollBehavior (to, from, savedPosition) {
  7 |     if (to.path.startsWith("/help") && from.path.startsWith("/help")) {
  8 |       return {
  9 |         el: '#hrv',
 10 |         behavior: 'smooth',
 11 |       }
 12 |     }
 13 |     // always scroll to top
 14 |     return { top: 0 }
 15 |   },
 16 |   routes: [
 17 |     {
 18 |       path: '/',
 19 |       name: 'search',
 20 |       component: async () => await import('../views/SearchView.vue'),
 21 |       props: (route) => ({
 22 |         q: route.query.q,
 23 |         p: Number.parseInt(route.query.p),
 24 |         t: route.query.t,
 25 |         y: route.query.y
 26 |       })
 27 |     },
 28 |     {
 29 |       path: '/reader',
 30 |       name: 'reader',
 31 |       component: async () => await import('../views/Reader.vue'),
 32 |       props: (route) => ({
 33 |         cid: route.query.cid,
 34 |         filename: route.query.filename,
 35 |         anchor: route.query.anchor
 36 |       })
 37 |     },
 38 |     {
 39 |       path: '/bookmarks',
 40 |       name: 'bookmarks',
 41 |       component: async () => await import('../views/BookmarksView.vue')
 42 |     },
 43 |     {
 44 |       path: '/help',
 45 |       name: 'help',
 46 |       component: async () => await import('../views/HelpView.vue'),
 47 |       children: [
 48 |         {
 49 |           path: '',
 50 |           name: 'intro',
 51 |           component: async () => await import('../views/IntroView.vue')
 52 |         },
 53 |         {
 54 |           path: 'doomsday',
 55 |           name: 'doomsday',
 56 |           component: async () => await import('../views/DoomsdayView.vue')
 57 |         },
 58 |         {
 59 |           path: 'donate',
 60 |           name: 'donate',
 61 |           component: async () => await import('../views/DonateView.vue')
 62 |         },
 63 |         {
 64 |           path: 'how-to-search',
 65 |           name: 'how-to-search',
 66 |           component: async () => await import('../views/HowToSearchView.vue')
 67 |         },
 68 |         {
 69 |           path: 'install-ipfs',
 70 |           name: 'install-ipfs',
 71 |           component: async () => await import('../views/InstallIpfsView.vue')
 72 |         },
 73 |         {
 74 |           path: 'replicate',
 75 |           name: 'replicate',
 76 |           component: async () => await import('../views/ReplicateView.vue')
 77 |         },
 78 |         {
 79 |           path: 'stc-box',
 80 |           name: 'stc-box',
 81 |           component: async () => await import('../views/StcBoxView.vue')
 82 |         },
 83 |         {
 84 |           path: 'stc-hub-api',
 85 |           name: 'stc-hub-api',
 86 |           component: async () => await import('../views/StcHubApiView.vue')
 87 |         }
 88 |       ]
 89 |     },
 90 |     {
 91 |       path: '/nexus_science/:id(.+)',
 92 |       name: 'document',
 93 |       component: async () => await import('../views/DocumentView.vue'),
 94 |       props: true
 95 |     }
 96 |   ]
 97 | })
 98 | 
 99 | export default router
100 | 


--------------------------------------------------------------------------------
/web/src/services/search/search-service.ts:
--------------------------------------------------------------------------------
 1 | // @ts-nocheck
 2 | 
 3 | import {
 4 |     type IndexQuery,
 5 | } from 'summa-wasm'
 6 | import {
 7 |     IpfsSearchProvider,
 8 |     RemoteSearchProvider,
 9 |     type SearchProvider, SearchProviderStatus,
10 | } from "@/services/search/search-provider";
11 | import {ref} from "vue";
12 | import {utils} from "summa-wasm";
13 | 
14 | export class SearchService {
15 |     search_providers: Array<SearchProvider>;
16 |     current_provider_ix: Number;
17 |     init_guard: Promise<void>;
18 |     current_init_status: any;
19 |     loading_failure_reason: any;
20 | 
21 | 
22 |     constructor(logging_level: string) {
23 |         this.current_init_status = ref(undefined);
24 |         let search_providers = [];
25 |         let { ipfs_hostname, ipfs_protocol } = utils.get_ipfs_hostname();
26 |         const ipfs_hostname_stripped = ipfs_hostname.split(':')[0]
27 |         if (
28 |             ipfs_hostname_stripped !== 'localhost'
29 |             && ipfs_hostname_stripped !== 'ipfs.io'
30 |             && ipfs_hostname_stripped !== 'dweb.link'
31 |         ) {
32 |             search_providers.push(new RemoteSearchProvider(
33 |                 `${ipfs_protocol}//api.${ipfs_hostname_stripped}`,
34 |                 "Local API",
35 |             ));
36 |         }
37 |         search_providers.push(...[
38 |             new RemoteSearchProvider(
39 |                 "https://api.libstc.cc",
40 |                 "Nebula Nomad Station",
41 |             ),
42 |             new IpfsSearchProvider(this.current_init_status, {logging_level}),
43 |         ]);
44 |         this.search_providers = search_providers;
45 |         this.current_provider_ix = ref(undefined);
46 |         this.loading_failure_reason = ref(undefined);
47 |         this.init_guard = (async () => {
48 |             await this.setup();
49 |         })()
50 |     }
51 | 
52 |     async setup() {
53 |         let last_error = undefined;
54 |         for (const [index, search_provider] of this.search_providers.entries()) {
55 |             try {
56 |                 await search_provider.setup(this.current_init_status);
57 |             } catch (e) {
58 |                 last_error = e;
59 |                 continue;
60 |             }
61 |             if (search_provider.status.value == SearchProviderStatus.Succeeded) {
62 |                 this.current_provider_ix.value = index;
63 |                 return;
64 |             }
65 |         }
66 |         if (last_error !== undefined) {
67 |             this.loading_failure_reason.value = last_error.toString();
68 |         }
69 |     }
70 | 
71 |     async change_provider(index: Number) {
72 |         const new_provider = this.search_providers[index];
73 |         if (new_provider.status.value == SearchProviderStatus.NotSetup) {
74 |             await new_provider.setup();
75 |         } else {
76 |             await new_provider.healthcheck();
77 |         }
78 |         if (new_provider.status.value == SearchProviderStatus.Succeeded) {
79 |             this.current_provider_ix.value = index;
80 |         }
81 |     }
82 | 
83 |     async search(index_query: IndexQuery, options: QueryOptions): Promise<object[]> {
84 |       await this.init_guard;
85 |       return this.search_providers[this.current_provider_ix.value].search(index_query, options);
86 |     }
87 | }
88 | 


--------------------------------------------------------------------------------
/tgbot/views/telegram/document_list_widget.py:
--------------------------------------------------------------------------------
 1 | from typing import (
 2 |     List,
 3 |     Optional,
 4 | )
 5 | 
 6 | from telethon import Button
 7 | 
 8 | from library.telegram.common import close_button
 9 | from tgbot.translations import t
10 | from tgbot.views.telegram.base_holder import BaseTelegramDocumentHolder
11 | 
12 | 
13 | class DocumentListWidget:
14 |     def __init__(
15 |         self,
16 |         chat: dict,
17 |         document_holders: List[BaseTelegramDocumentHolder],
18 |         bot_name,
19 |         header: Optional[str] = None,
20 |         promotioner=None,
21 |         has_next: bool = False,
22 |         session_id: Optional[str] = None,
23 |         message_id: Optional[int] = None,
24 |         request_id: Optional[str] = None,
25 |         cmd: str = None,
26 |         page: int = 0,
27 |         page_size: int = 5,
28 |     ):
29 |         self.chat = chat
30 |         self.document_holders = document_holders
31 |         self.bot_name = bot_name
32 |         self.header = header
33 |         self.promotioner = promotioner
34 |         self.cmd = cmd
35 |         self.has_next = has_next
36 |         self.session_id = session_id
37 |         self.message_id = message_id
38 |         self.request_id = request_id
39 |         self.page = page
40 |         self.page_size = page_size
41 | 
42 |     async def render(self) -> tuple[str, Optional[list]]:
43 |         if not len(self.document_holders):
44 |             return t('COULD_NOT_FIND_ANYTHING', self.chat['language']), [close_button(self.session_id)]
45 | 
46 |         serp_elements = []
47 |         for position, document_holder in enumerate(self.document_holders):
48 |             serp_elements.append(
49 |                 document_holder
50 |                 .view_builder(self.chat['language'])
51 |                 .add_short_description()
52 |                 .add_new_line()
53 |                 .add_links()
54 |                 .build()
55 |             )
56 | 
57 |         serp = '\n\n'.join(serp_elements)
58 | 
59 |         if self.header:
60 |             serp = f'**{self.header}**\n\n{serp}'
61 | 
62 |         promotion_language = self.chat['language']
63 |         promo = self.promotioner.choose_promotion(promotion_language)
64 |         serp = f'{serp}\n\n{promo}\n'
65 | 
66 |         buttons = []
67 |         if self.cmd and self.message_id and self.session_id and (self.has_next or self.page > 0):
68 |             buttons = [
69 |                 Button.inline(
70 |                     text='<<1' if self.page > 1 else ' ',
71 |                     data=f'/{self.cmd}_{self.session_id}_{self.message_id}_0'
72 |                     if self.page > 1 else '/noop',
73 |                 ),
74 |                 Button.inline(
75 |                     text=f'<{self.page}' if self.page > 0 else ' ',
76 |                     data=f'/{self.cmd}_{self.session_id}_{self.message_id}_{self.page - 1}'
77 |                     if self.page > 0 else '/noop',
78 |                 ),
79 |                 Button.inline(
80 |                     text=f'{self.page + 2}>' if self.has_next else ' ',
81 |                     data=f'/{self.cmd}_{self.session_id}_{self.message_id}_{self.page + 1}'
82 |                     if self.has_next else '/noop',
83 |                 )
84 |             ]
85 |         buttons.append(close_button(self.session_id))
86 |         return serp, buttons
87 | 


--------------------------------------------------------------------------------
/web/src/utils.ts:
--------------------------------------------------------------------------------
  1 | import { utils } from 'summa-wasm'
  2 | 
  3 | async function get_default_cover() {
  4 |   const default_cover = await fetch('./default-cover.jpg')
  5 |   const blob = await default_cover.blob()
  6 |   return URL.createObjectURL(blob);
  7 | }
  8 | export const default_cover = await get_default_cover()
  9 | 
 10 | export function format_bytes (bytes: number, decimals = 2) {
 11 |   if (!+bytes) return '0 Bytes'
 12 | 
 13 |   const k = 1024
 14 |   const dm = decimals < 0 ? 0 : decimals
 15 |   const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB']
 16 | 
 17 |   const i = Math.floor(Math.log(bytes) / Math.log(k))
 18 | 
 19 |   return `${parseFloat((bytes / Math.pow(k, i)).toFixed(dm))} ${sizes[i]}`
 20 | }
 21 | 
 22 | export function format_date (unixtime: bigint): string {
 23 |   const date = new Date(Number.parseInt(unixtime.toString()) * 1000)
 24 |   let month = (date.getMonth() + 1).toString()
 25 |   if (month.length < 2) {
 26 |     month = '0' + month
 27 |   }
 28 |   let day = date.getDate().toString()
 29 |   if (day.length < 2) {
 30 |     day = '0' + day
 31 |   }
 32 |   return `${date.getFullYear()}-${month}-${day}`
 33 | }
 34 | 
 35 | export function format_percent (v: number): string {
 36 |   return (v * 100).toFixed(2) + '%'
 37 | }
 38 | export const sleep = async (ms: number) => await new Promise((r) => setTimeout(r, ms))
 39 | 
 40 | export function generate_filename (title: string) {
 41 |   return (
 42 |       (title || "unnamed")
 43 |       .toLowerCase()
 44 |       .replace(/[^\p{L}\p{N}]/gu, ' ')
 45 |       .replace(/\s+/gu, ' ')
 46 |       .replace(/\s/gu, '-')
 47 |   )
 48 | }
 49 | 
 50 | export function is_int (s: string) {
 51 |   return !isNaN(parseFloat(s))
 52 | }
 53 | 
 54 | export function average (arr: number[]) {
 55 |   if (arr.length === 0) {
 56 |     return undefined
 57 |   }
 58 |   let total = 0
 59 |   for (let i = 0; i < arr.length; i++) {
 60 |     total += arr[i]
 61 |   }
 62 |   return total / arr.length
 63 | }
 64 | 
 65 | export function decode_html(html) {
 66 |     const txt = document.createElement('textarea');
 67 |     txt.innerHTML = html;
 68 |     return txt.value;
 69 | }
 70 | 
 71 | export function extract_text_from_html(html) {
 72 |     const parser = new DOMParser();
 73 |     const document = parser.parseFromString(html || "", "text/html");
 74 |     return document.getElementsByTagName("body")[0].textContent;
 75 | }
 76 | 
 77 | export function remove_unpaired_escaped_tags(str) {
 78 |     const openTags = [];
 79 |     const regex = /&lt;\/?([a-z][a-z0-9]*)\b[^&]*&gt;/gi;
 80 | 
 81 |     // First pass: Handle and remove unpaired closing tags
 82 |     let intermediateStr = str.replace(regex, (match, p1) => {
 83 |         if (match.startsWith('&lt;/')) {
 84 |             if (openTags.length && openTags[openTags.length - 1] === p1) {
 85 |                 openTags.pop();
 86 |                 return match; // Keep the closing tag if it matches the last opening tag
 87 |             }
 88 |             return ''; // Remove the closing tag if it doesn't match the last opening tag
 89 |         } else {
 90 |             openTags.push(p1);
 91 |             return match; // Keep the opening tag for now
 92 |         }
 93 |     });
 94 | 
 95 |     // Second pass: Remove unpaired opening tags
 96 |     for (const tag of openTags) {
 97 |         const unpairedTag = new RegExp(`&lt;${tag}\\b[^&]*&gt;`, 'gi');
 98 |         intermediateStr = intermediateStr.replace(unpairedTag, '');
 99 |     }
100 | 
101 |     return intermediateStr;
102 | }
103 | 


--------------------------------------------------------------------------------
/cybrex/examples/analyse-references.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "collapsed": false
  7 |    },
  8 |    "source": [
  9 |     "# Analyse References of Paper to Find an Answer\n",
 10 |     "\n",
 11 |     "Following example shows how to extract references from the document and do a semantic search over the documents and all its references\n",
 12 |     "\n",
 13 |     "Optionally, start Summa server to enhance performance of queries to STC"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": null,
 19 |    "metadata": {
 20 |     "collapsed": false
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "import logging\n",
 25 |     "import sys\n",
 26 |     "\n",
 27 |     "from stc_geck.advices import BaseDocumentHolder\n",
 28 |     "from cybrex.cybrex_ai import CybrexAI\n",
 29 |     "\n",
 30 |     "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
 31 |     "\n",
 32 |     "cybrex = CybrexAI()\n",
 33 |     "query = 'What drugs are used for haemophilia treatment?'\n",
 34 |     "seed_doi = '10.1111/j.1365-2516.2007.01474.x'\n",
 35 |     "await cybrex.start()"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "# Search seed document\n",
 44 |     "seed_document = await cybrex.search_documents(f'id.dois:{seed_doi}', n_documents=1)\n",
 45 |     "seed_document = seed_document[0].document\n",
 46 |     "\n",
 47 |     "# And track all document identifiers for further extraction\n",
 48 |     "related_document_ids = {f'id.dois:{seed_doi}'}\n",
 49 |     "\n",
 50 |     "# Get out references\n",
 51 |     "for reference in seed_document['references']:\n",
 52 |     "    related_document_ids.add(f'id.dois:{reference[\"doi\"]}')\n",
 53 |     "\n",
 54 |     "# Get in references\n",
 55 |     "referencing_documents = await cybrex.search_documents(f'rd:{seed_doi})', n_documents=100)\n",
 56 |     "for referencing_document in referencing_documents:\n",
 57 |     "    referencing_document_holder = BaseDocumentHolder(referencing_document)\n",
 58 |     "    related_document_ids.add(referencing_document_holder.get_internal_id())\n",
 59 |     "\n",
 60 |     "print('Following documents will be queries:', related_document_ids)\n",
 61 |     "\n",
 62 |     "# Now, retrieve all documents and its metadata\n",
 63 |     "related_documents = await cybrex.search_documents(' '.join(related_document_ids), n_documents=100)"
 64 |    ],
 65 |    "metadata": {
 66 |     "collapsed": false
 67 |    }
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "related_chunks = await cybrex.semantic_search_in_documents(query, related_documents, n_chunks=10, minimum_score=0.5, skip_downloading_pdf=False)"
 75 |    ],
 76 |    "metadata": {
 77 |     "collapsed": false
 78 |    }
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "related_chunks[:10]"
 86 |    ],
 87 |    "metadata": {
 88 |     "collapsed": false
 89 |    }
 90 |   }
 91 |  ],
 92 |  "metadata": {
 93 |   "kernelspec": {
 94 |    "display_name": "Python 3",
 95 |    "language": "python",
 96 |    "name": "python3"
 97 |   },
 98 |   "language_info": {
 99 |    "codemirror_mode": {
100 |     "name": "ipython",
101 |     "version": 3
102 |    },
103 |    "file_extension": ".py",
104 |    "mimetype": "text/x-python",
105 |    "name": "python",
106 |    "nbconvert_exporter": "python",
107 |    "pygments_lexer": "ipython3",
108 |    "version": "3.11.3"
109 |   }
110 |  },
111 |  "nbformat": 4,
112 |  "nbformat_minor": 0
113 | }
114 | 


--------------------------------------------------------------------------------
/web/src/scss/styles.scss:
--------------------------------------------------------------------------------
  1 | $base-light: #644494;
  2 | $base-dark: #FE9609;
  3 | $enable-negative-margins: true;
  4 | 
  5 | $headings-color: $base-light;
  6 | $headings-color-dark: $base-dark;
  7 | $link-color: $base-light;
  8 | $link-color-dark: $base-dark;
  9 | $code-color: $base-light;
 10 | $code-color-dark: $base-dark;
 11 | $badge-font-size: 0.85em;
 12 | 
 13 | @import "bootstrap/scss/bootstrap";
 14 | @import "bootstrap-icons/font/bootstrap-icons.css";
 15 | 
 16 | .inversion-filter {}
 17 | @media (prefers-color-scheme: dark) {
 18 |   .inversion-filter {
 19 |      filter: invert(100%) hue-rotate(180deg);
 20 |   }
 21 | }
 22 | 
 23 | .favicon-inversion-filter {
 24 |   filter: invert(29%) sepia(15%) saturate(2317%) hue-rotate(222deg) brightness(100%) contrast(90%)
 25 | }
 26 | @media (prefers-color-scheme: dark) {
 27 |   .favicon-inversion-filter {
 28 |      filter: invert(78%) sepia(78%) saturate(4236%) hue-rotate(356deg) brightness(103%) contrast(99%);
 29 |   }
 30 | }
 31 | 
 32 | .content-view {
 33 |   word-break: break-word !important;
 34 | }
 35 | 
 36 | .content-view header { @extend h5; }
 37 | .content-view section header { @extend b; }
 38 | 
 39 | .content-view .infobox {
 40 |   font-family: sans-serif;
 41 |   max-width: 320px;
 42 |   float: right !important;
 43 |   clear: right !important;
 44 |   background-color: rgb(44, 50, 56) !important;
 45 |   border: 1px solid #a2a9b1 !important;
 46 |   border-spacing: 3px !important;
 47 |   margin: 0.5em 0 0.5em 1em;
 48 |   padding: 0.2em;
 49 |   line-height: 1.5em;
 50 |   width: 100%;
 51 |   @extend small;
 52 |   @extend .m-2;
 53 | }
 54 | 
 55 | .content-view .wikitable {
 56 |   margin: 1em 0;
 57 |   border: 1px solid #a2a9b1;
 58 |   border-collapse: collapse;
 59 | }
 60 | 
 61 | .content-view .wikitable > tr > th, .wikitable > * > tr > th {
 62 |     text-align: center;
 63 | }
 64 | 
 65 | .content-view .wikitable > tr > th, .wikitable > tr > td, .wikitable > * > tr > th, .wikitable > * > tr > td {
 66 |     border: 1px solid #a2a9b1;
 67 |     padding: 0.2em 0.4em;
 68 | }
 69 | 
 70 | .content-view .infobox > tbody {
 71 |   font-family: sans-serif;
 72 |   border-spacing: 3px;
 73 |   line-height: 1.5em;
 74 |   display: flex !important;
 75 |   flex-flow: column nowrap !important;
 76 |   @extend .m-2;
 77 | }
 78 | 
 79 | 
 80 | .content-view .infobox > tbody table {
 81 |   width: 100%;
 82 |   min-width: 100%;
 83 |   border-collapse: collapse;
 84 |   display: inline-table;
 85 | }
 86 | 
 87 | .content-view .reflist {
 88 |   @extend .small;
 89 | }
 90 | .content-view .refbegin {
 91 |   @extend .small;
 92 | }
 93 | 
 94 | .content-view details > summary {
 95 |   list-style: none;
 96 | }
 97 | .content-view details > summary::-webkit-details-marker {
 98 |   display: none;
 99 | }
100 | 
101 | .content-view .tright {
102 |   float: right !important;
103 |   clear: right !important;
104 |   margin: 14px 0 14px 14px;
105 | }
106 | 
107 | .content-view .tleft {
108 |   float: left !important;
109 |   clear: left !important;
110 |   margin: 14px 14px 14px 0;
111 | }
112 | 
113 | .content-view .thumb {
114 |   max-width: 704px;
115 | 
116 |   box-sizing: content-box;
117 |   display: block !important;
118 |   position: static;
119 |   text-align: center !important;
120 | }
121 | 
122 | .content-view .thumb .thumbinner {
123 |   display: flex !important;
124 |   flex-direction: column;
125 | }
126 | 
127 | .content-view .thumbcaption {
128 |   margin: 0.5em 0 0;
129 |   padding: 0 !important;
130 |   width: auto !important;
131 |   font-size: 0.8em;
132 |   line-height: 1.5;
133 |   @extend small;
134 | }
135 | 
136 | .content-view .trow {
137 |   display: flex !important;
138 | }
139 | 
140 | .content-view .hatnote {
141 |   @extend small;
142 |   @extend .m-2;
143 | }
144 | 
145 | 


--------------------------------------------------------------------------------
/cybrex/cybrex/data_source/geck_data_source.py:
--------------------------------------------------------------------------------
 1 | from typing import (
 2 |     List,
 3 |     Optional,
 4 | )
 5 | 
 6 | import orjson
 7 | from stc_geck.advices import BaseDocumentHolder
 8 | from stc_geck.client import StcGeck
 9 | 
10 | from .base import (
11 |     BaseDataSource,
12 |     SourceDocument,
13 | )
14 | 
15 | 
16 | class GeckDataSource(BaseDataSource):
17 |     def __init__(self, geck: StcGeck):
18 |         self.geck = geck
19 | 
20 |     def _query_function(
21 |         self,
22 |         query: str = '',
23 |         with_language_filter: bool = True,
24 |         with_type_filter: bool = True,
25 |         with_existence_filter: bool = False,
26 |     ):
27 |         subqueries = []
28 |         if with_type_filter:
29 |             subqueries.append({'occur': 'must', 'query': {'boolean': {'subqueries': [
30 |                 {'occur': 'should', 'query': {'term': {'field': 'type', 'value': 'book'}}},
31 |                 {'occur': 'should', 'query': {'term': {'field': 'type', 'value': 'edited-book'}}},
32 |                 {'occur': 'should', 'query': {'term': {'field': 'type', 'value': 'monograph'}}},
33 |                 {'occur': 'should', 'query': {'term': {'field': 'type', 'value': 'reference-book'}}},
34 |                 {'occur': 'should', 'query': {'term': {'field': 'type', 'value': 'journal-article'}}},
35 |                 {'occur': 'should', 'query': {'term': {'field': 'type', 'value': 'wiki'}}},
36 |             ]}}})
37 |         if with_language_filter:
38 |             subqueries.append({'occur': 'must', 'query': {'term': {'field': 'languages', 'value': 'en'}}})
39 |         if with_existence_filter:
40 |             subqueries.append({'occur': 'must', 'query': {'exists': {'field': 'content'}}})
41 |         if query:
42 |             subqueries.append({'occur': 'must', 'query': {'match': {'value': query.lower()}}})
43 |         if subqueries:
44 |             return {'boolean': {'subqueries': subqueries}}
45 |         else:
46 |             return {'all': {}}
47 | 
48 |     async def stream_documents(
49 |         self,
50 |         query: str,
51 |         limit: int = 0,
52 |     ) -> List[SourceDocument]:
53 |         documents = self.geck.get_summa_client().documents(
54 |             self.geck.index_alias,
55 |             query_filter=self._query_function(query, with_existence_filter=True),
56 |         )
57 |         counter = 0
58 |         async for document in documents:
59 |             document = orjson.loads(document)
60 |             document_holder = BaseDocumentHolder(document)
61 |             document_id = document_holder.get_internal_id()
62 |             if not document_id:
63 |                 continue
64 |             yield SourceDocument(
65 |                 document=document,
66 |                 document_id=document_id,
67 |             )
68 |             counter += 1
69 |             if limit and counter >= limit:
70 |                 return
71 | 
72 |     async def search_documents(
73 |         self,
74 |         query: str,
75 |         limit: int = 5,
76 |         sources: Optional[List[str]] = None
77 |     ) -> List[SourceDocument]:
78 |         documents = await self.geck.get_summa_client().search_documents({
79 |             'index_alias': self.geck.index_alias,
80 |             'query': self._query_function(query),
81 |             'collectors': [{'top_docs': {'limit': limit}}],
82 |             'is_fieldnorms_scoring_enabled': False,
83 |         })
84 |         source_documents = []
85 |         for document in documents:
86 |             document_holder = BaseDocumentHolder(document)
87 |             document_id = document_holder.get_internal_id()
88 |             if not document_id:
89 |                 continue
90 |             source_documents.append(SourceDocument(
91 |                 document=document,
92 |                 document_id=document_id,
93 |             ))
94 |         return source_documents
95 | 


--------------------------------------------------------------------------------
/web/src/components/DocumentButtons.vue:
--------------------------------------------------------------------------------
 1 | <template lang="pug">
 2 | div
 3 |   .btn-group.btn-group-sm(v-if="http_links.get_best_file_links_groups() !== undefined")
 4 |     button.btn.btn-secondary(@click.stop.prevent="launch_reader")
 5 |       i.bi.bi-book
 6 |   .btn-group.btn-group-sm.ms-2(v-if="http_links.first_file_links_group_first_external_link()")
 7 |     button.btn.btn-secondary(data-bs-toggle="modal" data-bs-target="#qr-modal")
 8 |       i.bi.bi-qr-code-scan
 9 |   .btn-group.btn-group-sm.ms-2
10 |     button.btn.btn-secondary(v-if="!bookmark" @click.stop.prevent="add_bookmark")
11 |       i.bi.bi-bookmark
12 |     button.btn.btn-secondary(v-else @click.stop.prevent="remove_bookmark")
13 |       i.bi.bi-bookmark-check-fill
14 |   span
15 |     .btn-group.btn-group-sm.dropup(v-if="http_links.file_links_groups.length > 0").ms-2
16 |       a.btn.btn-secondary(type="button" :href="http_links.first_file_links_group().first_link().url + '&download=true'" target="_blank")
17 |         i.bi.bi-cloud-download-fill.me-2.ms-2 &nbsp; {{ http_links.first_file_links_group().label }}
18 |       button.btn.btn-secondary.dropdown-toggle.dropdown-toggle-split.ms-1(v-if="http_links.file_links_groups.length > 1" type="button", data-bs-toggle="dropdown" aria-expanded="false")
19 |         span.visually-hidden.ms-1.me-1 Toggle Dropdown
20 |       ul.dropdown-menu(v-if="http_links.file_links_groups.length > 1")
21 |         li(v-for="(file_links_group, index) in http_links.file_links_groups")
22 |           a.dropdown-item(:href="file_links_group.first_link().url + '&download=true'" target="_blank") {{ file_links_group.label }}
23 |   .modal.fade(v-if="http_links.first_file_links_group_first_external_link()" id="qr-modal" tabindex="-1" aria-labelledby="exampleModalLabel" aria-hidden="true")
24 |     .modal-dialog
25 |       .modal-content
26 |         .modal-header
27 |           h5.modal-title IPFS Link
28 |           button.btn-close(type="button" data-bs-dismiss="modal" aria-label="Close")
29 |         div.modal-body
30 |           qr-code(:url="http_links.first_file_links_group_first_external_link().url")
31 | 
32 | </template>
33 | 
34 | <script lang="ts">
35 | // @ts-nocheck
36 | 
37 | import { useObservable } from '@vueuse/rxjs'
38 | import { liveQuery } from 'dexie'
39 | import { defineComponent, type PropType } from 'vue'
40 | 
41 | import { Bookmark, user_db } from '@/database'
42 | 
43 | import QrCode from './QrCode.vue'
44 | import {format_bytes} from "@/utils";
45 | import {HttpLinks} from "@/components/BaseDocument.vue";
46 | 
47 | export default defineComponent({
48 |   name: 'DocumentButtons',
49 |   components: { QrCode },
50 |   props: {
51 |     query: {
52 |       type: String,
53 |       required: true
54 |     },
55 |     http_links: {
56 |       type: HttpLinks
57 |     }
58 |   },
59 |   data () {
60 |     return {
61 |       bookmark: useObservable(
62 |         liveQuery(async () => {
63 |           return await user_db.bookmarks.get({
64 |             index_name: 'nexus_science',
65 |             query: this.query
66 |           })
67 |         })
68 |       )
69 |     }
70 |   },
71 |   methods: {
72 |     async add_bookmark () {
73 |       await user_db.add_bookmark(new Bookmark("nexus_science", this.query))
74 |     },
75 |     async launch_reader () {
76 |       const best_link = this.http_links.get_best_file_links_groups()
77 |       if (best_link) {
78 |         if (!best_link.filename.endsWith("pdf") && !best_link.filename.endsWith("epub") && !best_link.filename.endsWith("djvu")) {
79 |           return;
80 |         }
81 |         this.$router.push({
82 |             name: 'reader',
83 |             query: {
84 |               cid: best_link.cid,
85 |               filename: best_link.filename,
86 |             }
87 |           })
88 |           return;
89 |       }
90 |     },
91 |     async remove_bookmark () {
92 |       await user_db.delete_bookmark("nexus_science", this.query)
93 |     }
94 |   }
95 | })
96 | </script>
97 | 


--------------------------------------------------------------------------------
/cybrex/cybrex/chains/map_reduce.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import (
  3 |     Iterable,
  4 |     List,
  5 | )
  6 | 
  7 | from ..document_chunker import Chunk
  8 | from ..llm_manager import LLMManager
  9 | 
 10 | 
 11 | class MapReduceChain:
 12 |     def __init__(self, llm_manager: LLMManager, chunk_accumulator):
 13 |         self.llm_manager = llm_manager
 14 |         self.chunk_accumulator = chunk_accumulator
 15 | 
 16 |     def input_splitter(self, chunks: List[Chunk]) -> str:
 17 |         for chunk in chunks:
 18 |             self.chunk_accumulator.accept(chunk)
 19 |             if self.chunk_accumulator.is_full():
 20 |                 yield self.chunk_accumulator.produce()
 21 |         if not self.chunk_accumulator.is_empty():
 22 |             yield self.chunk_accumulator.produce()
 23 | 
 24 |     def output_processor(self, llm_output: str) -> Chunk:
 25 |         return Chunk(
 26 |             title=None,
 27 |             document_id=None,
 28 |             chunk_id=None,
 29 |             text=llm_output,
 30 |             length=len(llm_output)
 31 |         )
 32 | 
 33 |     def process(self, chunks: Iterable[Chunk]):
 34 |         while True:
 35 |             input_chunks = self.input_splitter(chunks)
 36 |             outputs = []
 37 |             for input_chunk in input_chunks:
 38 |                 llm_output = self.llm_manager.process(input_chunk)
 39 |                 logging.getLogger('statbox').info({
 40 |                     'action': 'intermediate_map_reduce_step',
 41 |                     'output': llm_output,
 42 |                 })
 43 |                 outputs.append(llm_output)
 44 |             if len(outputs) == 1:
 45 |                 return outputs[0].strip()
 46 |             chunks = list(map(self.output_processor, outputs))
 47 | 
 48 | 
 49 | class ChunkAccumulator:
 50 |     def __init__(self, prompter, max_chunk_length: int):
 51 |         self.prompter = prompter
 52 |         self.max_chunk_length = max_chunk_length
 53 |         self.chunks = []
 54 |         self.current_chunk_length = 0
 55 | 
 56 |     def accept(self, chunk: Chunk):
 57 |         self.current_chunk_length += len(chunk.text)
 58 |         self.chunks.append(chunk)
 59 | 
 60 |     def is_full(self):
 61 |         return self.current_chunk_length >= self.max_chunk_length
 62 | 
 63 |     def is_empty(self):
 64 |         return len(self.chunks) == 0
 65 | 
 66 | 
 67 | class QAChunkAccumulator(ChunkAccumulator):
 68 |     def __init__(self, query: str, prompter, max_chunk_length: int):
 69 |         super().__init__(prompter=prompter, max_chunk_length=max_chunk_length)
 70 |         self.query = query
 71 | 
 72 |     def produce(self):
 73 |         collected_chunks = self.chunks
 74 |         self.chunks = []
 75 |         self.current_chunk_length = 0
 76 |         return self.prompter.qa_prompt(self.query, collected_chunks)
 77 | 
 78 | 
 79 | class SummarizeChunkAccumulator(ChunkAccumulator):
 80 |     def produce(self):
 81 |         collected_chunks = self.chunks
 82 |         self.chunks = []
 83 |         self.current_chunk_length = 0
 84 |         return self.prompter.summarize_prompt(collected_chunks)
 85 | 
 86 | 
 87 | class QAChain(MapReduceChain):
 88 |     def __init__(self, query: str, llm_manager):
 89 |         super().__init__(
 90 |             llm_manager=llm_manager,
 91 |             chunk_accumulator=QAChunkAccumulator(
 92 |                 query=query,
 93 |                 prompter=llm_manager.prompter,
 94 |                 max_chunk_length=llm_manager.max_prompt_chars,
 95 |             ))
 96 | 
 97 | 
 98 | class SummarizeChain(MapReduceChain):
 99 |     def __init__(self, llm_manager: LLMManager):
100 |         super().__init__(
101 |             llm_manager=llm_manager,
102 |             chunk_accumulator=SummarizeChunkAccumulator(
103 |                 prompter=llm_manager.prompter,
104 |                 max_chunk_length=llm_manager.max_prompt_chars,
105 |             )
106 |         )
107 | 


--------------------------------------------------------------------------------
/tgbot/views/telegram/common.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import binascii
  3 | import logging
  4 | 
  5 | import base36
  6 | from izihawa_utils.exceptions import BaseError
  7 | from telethon import Button
  8 | 
  9 | from tgbot.translations import t
 10 | 
 11 | 
 12 | class TooLongQueryError(BaseError):
 13 |     level = logging.WARNING
 14 |     code = 'too_long_query_error'
 15 | 
 16 | 
 17 | class DecodeDeepQueryError(BaseError):
 18 |     level = logging.WARNING
 19 |     code = 'decode_deep_query_error'
 20 | 
 21 | 
 22 | def vote_button(language: str, case: str):
 23 |     label = f"REPORT_{case.upper()}_FILE"
 24 |     case = {'correct': 'c', 'incorrect': 'i'}[case]
 25 |     return Button.inline(
 26 |         text=t(label, language),
 27 |         data=f'/vote_{case}',
 28 |     )
 29 | 
 30 | 
 31 | def encode_query_to_deep_link(query, bot_name, skip_encoding=False):
 32 |     if not skip_encoding:
 33 |         query = encode_deep_query(query)
 34 |     if len(query) <= 64:
 35 |         return f'https://t.me/{bot_name}?start={query}'
 36 |     raise TooLongQueryError()
 37 | 
 38 | 
 39 | def to_bytes(n):
 40 |     return [n & 255] + to_bytes(n >> 8) if n > 0 else []
 41 | 
 42 | 
 43 | def recode_base36_to_base64(query):
 44 |     return base64.b64encode(bytearray(to_bytes(base36.loads(query))), altchars=b'-_').rstrip(b'=')
 45 | 
 46 | 
 47 | def recode_base64_to_base36(query):
 48 |     try:
 49 |         # Padding fix
 50 |         return base36.dumps(int.from_bytes(base64.b64decode(query + "=" * ((4 - len(query) % 4) % 4), altchars=b'-_'), 'little'))
 51 |     except (binascii.Error, ValueError, UnicodeDecodeError) as e:
 52 |         raise DecodeDeepQueryError(nested_error=e)
 53 | 
 54 | 
 55 | def encode_deep_query(query):
 56 |     return base64.b64encode(query.encode(), altchars=b'-_').decode()
 57 | 
 58 | 
 59 | def decode_deep_query(query):
 60 |     try:
 61 |         # Padding fix
 62 |         return base64.b64decode(query + "=" * ((4 - len(query) % 4) % 4), altchars=b'-_').decode()
 63 |     except (binascii.Error, ValueError, UnicodeDecodeError) as e:
 64 |         raise DecodeDeepQueryError(nested_error=e)
 65 | 
 66 | 
 67 | async def remove_button(event, mark, and_empty_too=False, link_preview=None):
 68 |     original_message = await event.get_message()
 69 |     if original_message:
 70 |         original_buttons = original_message.buttons
 71 |         buttons = []
 72 |         for original_line in original_buttons:
 73 |             line = []
 74 |             for original_button in original_line:
 75 |                 if mark in original_button.text or (and_empty_too and not original_button.text.strip()):
 76 |                     continue
 77 |                 line.append(original_button)
 78 |             if line:
 79 |                 buttons.append(line)
 80 |         await event.edit(original_message.text, buttons=buttons, link_preview=link_preview)
 81 | 
 82 | 
 83 | def get_formatted_filesize(filesize) -> str:
 84 |     if filesize:
 85 |         filesize = max(1024, filesize)
 86 |         return '{:.1f}Mb'.format(float(filesize) / (1024 * 1024))
 87 |     else:
 88 |         return ''
 89 | 
 90 | 
 91 | def encode_link(bot_name, text, query) -> str:
 92 |     try:
 93 |         encoded_query = encode_query_to_deep_link(query, bot_name)
 94 |         if text:
 95 |             return f'[{text}]({encoded_query})'
 96 |         else:
 97 |             return encoded_query
 98 |     except TooLongQueryError:
 99 |         return text
100 | 
101 | 
102 | def fix_markdown(text: str):
103 |     if text.count('**') % 2 == 1:
104 |         position = text.rfind('**')
105 |         text = text[:position] + text[position + 2:]
106 |     if text.count('__') % 2 == 1:
107 |         position = text.rfind('__')
108 |         text = text[:position] + text[position + 2:]
109 |     return text
110 | 
111 | 
112 | def add_expand_dot(text, le: int):
113 |     if len(text) < le:
114 |         return text
115 |     crop_at = text[:le].rfind(' ')
116 |     return text[:crop_at] + '...'
117 | 


--------------------------------------------------------------------------------
/web/src/views/BookmarksView.vue:
--------------------------------------------------------------------------------
  1 | <template lang="pug">
  2 | .container.col-md-7.offset-md-2
  3 |   loading-spinner(v-if="is_loading" style="margin-top: 140px" :label="get_label('loading') + '...'")
  4 |   div(v-else-if="loading_failure_reason !== undefined")
  5 |     connectivity-issues-view(:reason="loading_failure_reason")
  6 |   div(v-else)
  7 |     .d-flex
  8 |       i.ms-3.me-auto {{ bookmarks.length }} {{ get_label('bookmarks') }}
  9 |       a.text-secondary(type="button" @click.stop.prevent="export_bookmarks") export
 10 |     div.mt-3.mb-3(v-if="!is_loading")
 11 |       search-list(:scored_documents='documents')
 12 |       nav(v-if="bookmarks.length > page_size")
 13 |         ul.pagination.justify-content-center
 14 |           li.page-item(v-if="page > 2" v-on:click="set_page(1);")
 15 |             a.page-link &lt;&lt;
 16 |           li.page-item(v-on:click="set_page(page - 1);")
 17 |             a.page-link &lt;
 18 |           li.page-item.disabled
 19 |             a.page-link {{ page }}
 20 |           li.page-item(v-if="bookmarks.length > page * page_size", v-on:click="set_page(page + 1);")
 21 |             a.page-link &gt;
 22 | </template>
 23 | 
 24 | <script lang="ts">
 25 | import { defineComponent } from 'vue'
 26 | import { RouterLink } from 'vue-router'
 27 | 
 28 | import ConnectivityIssuesView from '@/components/ConnectivityIssues.vue'
 29 | import LoadingSpinner from '@/components/LoadingSpinner.vue'
 30 | import SearchList from '@/components/SearchList.vue'
 31 | import { user_db } from '@/database'
 32 | 
 33 | export default defineComponent({
 34 |   name: 'SearchView',
 35 |   components: {
 36 |     ConnectivityIssuesView,
 37 |     LoadingSpinner,
 38 |     RouterLink,
 39 |     SearchList
 40 |   },
 41 |   data () {
 42 |     return {
 43 |       page: 1,
 44 |       is_loading: false,
 45 |       loading_failure_reason: undefined,
 46 |       bookmarks: [],
 47 |       documents: []
 48 |     }
 49 |   },
 50 |   async created () {
 51 |     try {
 52 |       document.title = 'Bookmarks - STC'
 53 |       this.is_loading = true
 54 |       this.bookmarks = await user_db.get_all_bookmarks()
 55 |       await this.submit()
 56 |     } catch (e) {
 57 |       console.error(e)
 58 |       this.loading_failure_reason = e
 59 |     } finally {
 60 |       this.is_loading = false
 61 |     }
 62 |   },
 63 |   methods: {
 64 |     async export_bookmarks () {
 65 |       const element = document.createElement('a')
 66 |       element.setAttribute(
 67 |         'href',
 68 |         'data:text/plain;charset=utf-8,' +
 69 |           encodeURIComponent(JSON.stringify(await user_db.get_all_bookmarks()))
 70 |       )
 71 |       element.setAttribute('download', 'stc-bookmarks.json')
 72 |       element.style.display = 'none'
 73 |       document.body.appendChild(element)
 74 |       element.click()
 75 |       document.body.removeChild(element)
 76 |     },
 77 |     async set_page (new_page: number) {
 78 |       if (new_page < 1) {
 79 |         new_page = 1
 80 |       } else {
 81 |         this.page = new_page
 82 |         await this.submit()
 83 |       }
 84 |     },
 85 |     async submit () {
 86 |       this.is_loading = true
 87 |       try {
 88 |         const new_documents = []
 89 |         const bookmarks = this.bookmarks.slice(
 90 |           (this.page - 1) * 5,
 91 |           this.page * 5
 92 |         )
 93 |         for (const load_bookmark of bookmarks) {
 94 |           const collector_outputs = await this.search_service.search(load_bookmark.query, {
 95 |             page: 1,
 96 |             index_name: load_bookmark.index_name
 97 |           })
 98 |           new_documents.push(
 99 |             collector_outputs[0].collector_output.documents.scored_documents[0]
100 |           )
101 |         }
102 |         this.documents = new_documents
103 |       } catch (e) {
104 |         console.error(e)
105 |         this.loading_failure_reason = e
106 |       } finally {
107 |         this.is_loading = false
108 |       }
109 |     }
110 |   }
111 | })
112 | </script>
113 | 


--------------------------------------------------------------------------------
/tgbot/handlers/submit.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import re
 3 | from urllib.parse import unquote
 4 | 
 5 | from telethon import events
 6 | 
 7 | from library.telegram.base import RequestContext
 8 | from library.telegram.common import close_button
 9 | from library.textutils import DOI_REGEX
10 | from tgbot.app.exceptions import UnknownFileFormatError
11 | from tgbot.translations import t
12 | 
13 | from ..app.librarian_service import extract_internal_id
14 | from .base import BaseHandler
15 | 
16 | 
17 | def is_submit_message(event):
18 |     if event.document and event.document.mime_type in ('application/octet-stream', 'application/pdf', 'application/zip'):
19 |         return True
20 |     if event.fwd_from and event.fwd_from.document and event.document.mime_type in (
21 |         'application/octet-stream', 'application/pdf', 'application/zip'
22 |     ):
23 |         return True
24 |     return False
25 | 
26 | 
27 | class SubmitHandler(BaseHandler):
28 |     filter = events.NewMessage(func=is_submit_message, incoming=True)
29 |     is_group_handler = True
30 |     writing_handler = True
31 | 
32 |     def get_internal_id_hint(self, message, reply_message) -> str:
33 |         internal_id_hint = None
34 |         if message.text:
35 |             if internal_id := extract_internal_id(message.text):
36 |                 return internal_id
37 |             elif doi_regex := re.search(DOI_REGEX, message.raw_text):
38 |                 internal_id_hint = 'id.dois:' + doi_regex.group(1) + '/' + doi_regex.group(2)
39 |         if not internal_id_hint and reply_message:
40 |             if internal_id := extract_internal_id(reply_message.text):
41 |                 return internal_id
42 |             elif doi_regex := re.search(DOI_REGEX, reply_message.raw_text):
43 |                 internal_id_hint = 'id.dois:' + doi_regex.group(1) + '/' + doi_regex.group(2)
44 |         return internal_id_hint
45 | 
46 |     async def handler(self, event, request_context: RequestContext):
47 |         session_id = self.generate_session_id()
48 | 
49 |         request_context.add_default_fields(session_id=session_id)
50 |         request_context.statbox(action='show', mode='submit', mime_type=event.document.mime_type)
51 | 
52 |         reply_message = await event.get_reply_message()
53 |         internal_id_hint = self.get_internal_id_hint(message=event, reply_message=reply_message)
54 |         request_context.statbox(action='doi_hint', internal_id_hint=internal_id_hint)
55 | 
56 |         if not internal_id_hint:
57 |             return await event.reply(
58 |                 t('NO_DOI_HINT', request_context.chat['language']),
59 |                 buttons=None if request_context.is_group_mode() else [close_button()],
60 |             )
61 |         field, value = internal_id_hint.split(':', 1)
62 | 
63 |         match event.document.mime_type:
64 |             case 'application/pdf':
65 |                 if self.application.librarian_service:
66 |                     document = await self.application.summa_client.get_one_by_field_value(
67 |                         'nexus_science',
68 |                         field,
69 |                         value,
70 |                     )
71 |                     uploaded_message = await self.application.librarian_service.process_file(
72 |                         event,
73 |                         request_context,
74 |                         document,
75 |                     )
76 |                     await self.application.database.add_upload(event.sender_id, uploaded_message.id, internal_id_hint)
77 |             case _:
78 |                 request_context.statbox(action='unknown_file_format')
79 |                 request_context.error_log(UnknownFileFormatError(format=event.document.mime_type))
80 |                 return await asyncio.gather(
81 |                     event.reply(
82 |                         t('UNKNOWN_FILE_FORMAT_ERROR', request_context.chat['language']),
83 |                         buttons=None if request_context.is_group_mode() else [close_button()],
84 |                     ),
85 |                     event.delete(),
86 |                 )
87 | 


--------------------------------------------------------------------------------
/library/textutils/utils.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import struct
  3 | 
  4 | import isbnlib
  5 | 
  6 | from . import (
  7 |     EMAIL_REGEX,
  8 |     HASHTAG_REGEX,
  9 |     MULTIWHITESPACE_REGEX,
 10 |     NON_ALNUMWHITESPACE_REGEX,
 11 |     TELEGRAM_LINK_REGEX,
 12 |     URL_REGEX,
 13 | )
 14 | 
 15 | 
 16 | def add_surrogate(text):
 17 |     return ''.join(
 18 |         # SMP -> Surrogate Pairs (Telegram offsets are calculated with these).
 19 |         # See https://en.wikipedia.org/wiki/Plane_(Unicode)#Overview for more.
 20 |         ''.join(chr(y) for y in struct.unpack('<HH', x.encode('utf-16le')))
 21 |         if (0x10000 <= ord(x) <= 0x10FFFF) else x for x in text
 22 |     )
 23 | 
 24 | 
 25 | def cast_string_to_single_string(s):
 26 |     processed = MULTIWHITESPACE_REGEX.sub(' ', NON_ALNUMWHITESPACE_REGEX.sub(' ', s))
 27 |     processed = processed.strip().replace(' ', '-')
 28 |     return processed
 29 | 
 30 | 
 31 | def despace(text):
 32 |     text = re.sub(r'\n+', '\n', text)
 33 |     text = re.sub(r'[ \t]+', ' ', text)
 34 |     text = re.sub(r'\n[ \t]+', '\n', text)
 35 |     return text
 36 | 
 37 | 
 38 | def despace_full(text):
 39 |     return re.sub(r'\s+', ' ', text).strip()
 40 | 
 41 | 
 42 | def despace_smart(text):
 43 |     text = re.sub(r'\n\s*[-•]+\s*', r'\n', text)
 44 |     text = re.sub(r'\n{2,}', '\n', text).strip()
 45 |     text = re.sub(r'\.?(\s+)?\n', r'. ', text)
 46 |     text = re.sub(r'\s+', ' ', text)
 47 |     return text
 48 | 
 49 | 
 50 | def escape_format(text, escape_font=True, escape_quote=True, escape_brackets=True):
 51 |     if isinstance(text, str):
 52 |         if escape_font:
 53 |             text = re.sub(r'([_*]){2,}', r'\g<1>', text)
 54 |         if escape_quote:
 55 |             text = text.replace("`", "'")
 56 |         if escape_brackets:
 57 |             text = text.replace('[', r'`[`').replace(']', r'`]`')
 58 |     elif isinstance(text, bytes):
 59 |         if escape_font:
 60 |             text = re.sub(br'([_*]){2,}', br'\g<1>', text)
 61 |         if escape_quote:
 62 |             text = text.replace(b"`", b"'")
 63 |         if escape_brackets:
 64 |             text = text.replace(b'[', br'`[`').replace(b']', br'`]`')
 65 |     return text
 66 | 
 67 | 
 68 | def remove_markdown(text):
 69 |     text = re.sub('[*_~]{2,}', '', text)
 70 |     text = re.sub('`+', '', text)
 71 |     text = re.sub(r'\[\s*(.*?)(\s*)\]\(.*?\)', r'\g<1>\g<2>', text, flags=re.MULTILINE)
 72 |     return text
 73 | 
 74 | 
 75 | def remove_emails(text):
 76 |     return re.sub(EMAIL_REGEX, '', text)
 77 | 
 78 | 
 79 | def remove_hashtags(text):
 80 |     return re.sub(HASHTAG_REGEX, '', text)
 81 | 
 82 | 
 83 | def remove_hidden_chars(text):
 84 |     return text.replace('\xad', '')
 85 | 
 86 | 
 87 | def remove_url(text):
 88 |     return re.sub(URL_REGEX, '', text)
 89 | 
 90 | 
 91 | def replace_telegram_link(text):
 92 |     return re.sub(TELEGRAM_LINK_REGEX, r'@\1', text)
 93 | 
 94 | 
 95 | def split_at(s, pos):
 96 |     if len(s) < pos:
 97 |         return s
 98 |     pos -= 10
 99 |     pos = max(0, pos)
100 |     for p in range(pos, min(pos + 20, len(s) - 1)):
101 |         if s[p] in [' ', '\n', '.', ',', ':', ';', '-']:
102 |             return s[:p] + '...'
103 |     return s[:pos] + '...'
104 | 
105 | 
106 | def unwind_hashtags(text):
107 |     return re.sub(HASHTAG_REGEX, r'\2', text)
108 | 
109 | 
110 | def process_isbns(isbnlikes):
111 |     isbns = []
112 |     for isbnlike in isbnlikes:
113 |         if not isbnlike:
114 |             continue
115 |         if isbnlike[0].isalpha() and len(isbnlike) == 10 and isbnlike[1:].isalnum():
116 |             isbns.append(isbnlike.upper())
117 |             continue
118 |         isbn = isbnlib.canonical(isbnlike)
119 |         if not isbn:
120 |             continue
121 |         isbns.append(isbn)
122 |         if isbnlib.is_isbn10(isbn):
123 |             if isbn13 := isbnlib.to_isbn13(isbn):
124 |                 isbns.append(isbn13)
125 |         elif isbnlib.is_isbn13(isbn):
126 |             if isbn10 := isbnlib.to_isbn10(isbn):
127 |                 isbns.append(isbn10)
128 |     return list(sorted(set(isbns)))
129 | 


--------------------------------------------------------------------------------
/tgbot/views/telegram/progress_bar.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | 
  3 | from izihawa_utils.exceptions import BaseError
  4 | from telethon.errors import MessageIdInvalidError
  5 | 
  6 | 
  7 | class ProgressBarLostMessageError(BaseError):
  8 |     pass
  9 | 
 10 | 
 11 | bars = {
 12 |     'filled': '█',
 13 |     'empty': ' ',
 14 | }
 15 | 
 16 | 
 17 | def percent(done, total):
 18 |     return min(float(done) / total, 1.0)
 19 | 
 20 | 
 21 | class ProgressBar:
 22 |     def __init__(
 23 |         self,
 24 |         telegram_client,
 25 |         request_context,
 26 |         banner,
 27 |         header,
 28 |         tail_text,
 29 |         message=None,
 30 |         source=None,
 31 |         throttle_secs: float = 0.0,
 32 |         hard_throttle_secs: float = 10.0,
 33 |         last_call: float = 0.0,
 34 |         done_threshold_size: int = 10 * 1024 * 1024,
 35 |     ):
 36 |         self.telegram_client = telegram_client
 37 |         self.request_context = request_context
 38 |         self.banner = banner
 39 |         self.header = header
 40 |         self.tail_text = tail_text
 41 |         self.message = message
 42 |         self.source = source
 43 |         self.done = 0
 44 |         self.total = 1
 45 |         self.throttle_secs = throttle_secs
 46 |         self.hard_throttle_secs = hard_throttle_secs
 47 |         self.done_threshold_size = done_threshold_size
 48 | 
 49 |         self.previous_done = 0
 50 |         self.last_text = None
 51 |         self.last_call = last_call
 52 | 
 53 |     def share(self):
 54 |         if self.total:
 55 |             return f'{float(percent(self.done, self.total) * 100):.1f}%'
 56 |         else:
 57 |             return f'{float(self.done / (1024 * 1024)):.1f}Mb'
 58 | 
 59 |     def _set_progress(self, done, total):
 60 |         self.previous_done = self.done
 61 |         self.done = done
 62 |         self.total = total
 63 | 
 64 |     def set_source(self, source):
 65 |         self.source = source
 66 | 
 67 |     def render_banner(self):
 68 |         banner = self.banner.format(source=self.source)
 69 |         return f'`{self.header}\n{banner}`'
 70 | 
 71 |     async def render_progress(self):
 72 |         total_bars = 20
 73 |         progress_bar = ''
 74 |         if self.total:
 75 |             filled = int(total_bars * percent(self.done, self.total))
 76 |             progress_bar = '|' + filled * bars['filled'] + (total_bars - filled) * bars['empty'] + '| '
 77 | 
 78 |         tail_text = self.tail_text.format(source=self.source)
 79 |         return f'`{self.header}\n{progress_bar}{self.share().ljust(8)} {tail_text}`'
 80 | 
 81 |     def should_send(self, now, ignore_last_call):
 82 |         if ignore_last_call:
 83 |             return True
 84 |         if abs(now - self.last_call) > self.hard_throttle_secs:
 85 |             return True
 86 |         if abs(now - self.last_call) > self.throttle_secs and (self.done - self.previous_done) < self.done_threshold_size:
 87 |             return True
 88 |         return False
 89 | 
 90 |     async def send_message(self, text, ignore_last_call=False):
 91 |         now = time.time()
 92 |         if not self.should_send(now, ignore_last_call):
 93 |             return
 94 |         try:
 95 |             if not self.message:
 96 |                 self.message = await self.telegram_client.send_message(
 97 |                     self.request_context.chat['chat_id'],
 98 |                     text,
 99 |                 )
100 |             elif text != self.last_text:
101 |                 r = await self.message.edit(text)
102 |                 if not r:
103 |                     raise ProgressBarLostMessageError()
104 |         except MessageIdInvalidError:
105 |             raise ProgressBarLostMessageError()
106 |         self.last_text = text
107 |         self.last_call = now
108 |         return self.message
109 | 
110 |     async def show_banner(self):
111 |         return await self.send_message(self.render_banner(), ignore_last_call=True)
112 | 
113 |     async def callback(self, done, total, ignore_last_call=False):
114 |         self._set_progress(done, total)
115 |         return await self.send_message(await self.render_progress(), ignore_last_call=ignore_last_call)
116 | 


--------------------------------------------------------------------------------
/web/src/components/DjvuReader.vue:
--------------------------------------------------------------------------------
  1 | <template lang="pug">
  2 | .container.col-md-8.offset-md-2(v-if="is_rendering")
  3 |   loading-spinner(style="margin-top: 140px" :label="'rendering...'")
  4 | div(ref="wrapper")
  5 |   canvas(v-show="!is_rendering" id="djvu-reader" ref="reader")
  6 | </template>
  7 | 
  8 | <script lang="ts">
  9 | import {defineComponent, type PropType, toRaw} from 'vue'
 10 | 
 11 | import router from "@/router";
 12 | import ConnectivityIssuesView from "@/components/ConnectivityIssues.vue";
 13 | import LoadingSpinner from "@/components/LoadingSpinner.vue";
 14 | import DjVu from "@/components/djvu";
 15 | import Hammer from 'hammerjs'
 16 | 
 17 | 
 18 | export default defineComponent({
 19 |   name: 'DjvuReader',
 20 |   emits: ["update-anchor"],
 21 |   components: {ConnectivityIssuesView, LoadingSpinner},
 22 |   props: {
 23 |     anchor: undefined as PropType<string>,
 24 |     data: {
 25 |       type: undefined as PropType<ArrayBuffer>
 26 |     },
 27 |   },
 28 |   data() {
 29 |     let d = {
 30 |       is_rendering: false,
 31 |       hammer: undefined,
 32 |       current_page: 1,
 33 |       mounted: false,
 34 |       scale: 0.8,
 35 |       worker: undefined,
 36 |     }
 37 |     if (this.anchor !== undefined) {
 38 |       d.current_page = Number.parseInt(this.anchor);
 39 |     }
 40 |     return d;
 41 |   },
 42 |   async created() {
 43 |     this.worker = new DjVu.Worker();
 44 |     await toRaw(this.worker).createDocument(this.data, undefined);
 45 |     await this.render(undefined);
 46 |   },
 47 |   mounted() {
 48 |     this.hammer = Hammer(this.$refs.reader, { touchAction : 'pan-y' });
 49 |     document.addEventListener("keyup", this.key_listener);
 50 |     this.hammer.get('swipe').set({ direction: Hammer.DIRECTION_HORIZONTAL })
 51 |     this.hammer.get('pan').set({ direction: Hammer.DIRECTION_ALL });
 52 |     this.hammer.get('pinch').set({ enable: true });
 53 |     this.hammer.on('panstart panmove', (ev) => ev.preventDefault());
 54 |     this.hammer.on('swipeleft', () => this.next_page());
 55 |     this.hammer.on('swiperight', () => this.previous_page());
 56 |     this.mounted = true
 57 |   },
 58 |   beforeUnmount() {
 59 |     if (this.mounted) {
 60 |       document.removeEventListener("keyup", this.key_listener, {capture: true});
 61 |       this.hammer.off("swipeleft");
 62 |       this.hammer.off("swiperight");
 63 |       this.hammer.off("panstart");
 64 |       this.hammer.off("panmove");
 65 |     }
 66 |     this.mounted = false;
 67 |   },
 68 |   methods: {
 69 |     async render(old_page) {
 70 |       this.is_rendering = true;
 71 |       try {
 72 |         const resultImageData = await toRaw(this.worker).doc.getPage(this.current_page).getImageData().run();
 73 |         this.$refs.reader.width = resultImageData.width;
 74 |         this.$refs.reader.height = resultImageData.height;
 75 |         this.$refs.reader.style.width = `${100 * this.scale}%`
 76 |         this.$refs.reader.style['min-width'] = `${100 * this.scale}%`
 77 | 
 78 |         const context = this.$refs.reader.getContext('2d');
 79 |         context.putImageData(resultImageData, 0, 0);
 80 |         this.$emit("update-anchor", this.current_page.toString())
 81 |       } catch {
 82 |         if (old_page !== undefined) {
 83 |           this.current_page = old_page;
 84 |         }
 85 |       }
 86 |       finally {
 87 |         this.is_rendering = false;
 88 |       }
 89 |     },
 90 |     previous_page() {
 91 |       const old_page = this.current_page;
 92 |       this.current_page -= 1;
 93 |       this.render(old_page);
 94 |     },
 95 |     next_page() {
 96 |       const old_page = this.current_page;
 97 |       this.current_page += 1;
 98 |       this.render(old_page);
 99 |     },
100 |     key_listener(event) {
101 |       event.preventDefault();
102 |       if (event.key == "ArrowLeft") {
103 |         this.previous_page()
104 |       } else if (event.key == "ArrowRight") {
105 |         this.next_page()
106 |       } else if (event.key === "Escape") {
107 |         router.back();
108 |         return;
109 |       }
110 |     },
111 |   }
112 | })
113 | </script>
114 | <style lang="scss" scoped>
115 | #djvu-reader {
116 |   display: block;
117 |   margin-left: auto;
118 |   margin-right: auto;
119 |   min-width: 100%;
120 |   width: 100%;
121 | }
122 | </style>
123 | 


--------------------------------------------------------------------------------
/web/src/components/EpubReader.vue:
--------------------------------------------------------------------------------
  1 | <template lang="pug">
  2 | div.inversion-filter(id="epub-reader" ref="reader")
  3 | </template>
  4 | 
  5 | <script lang="ts">
  6 | import {defineComponent, type PropType} from 'vue'
  7 | 
  8 | import ePub from "epubjs";
  9 | import router from "@/router";
 10 | import ConnectivityIssuesView from "@/components/ConnectivityIssues.vue";
 11 | import LoadingSpinner from "@/components/LoadingSpinner.vue";
 12 | import {get_label} from "@/translations";
 13 | import Hammer from 'hammerjs'
 14 | 
 15 | 
 16 | export default defineComponent({
 17 |   name: 'EpubReader',
 18 |   emits: ["update-anchor"],
 19 |   components: {ConnectivityIssuesView, LoadingSpinner},
 20 |   props: {
 21 |     anchor: {
 22 |       type: undefined as PropType<string> | undefined
 23 |     },
 24 |     data: {
 25 |       type: undefined as PropType<ArrayBuffer> | undefined
 26 |     },
 27 |   },
 28 |   data() {
 29 |     return {
 30 |       downloading_status: get_label("loading") + "...",
 31 |       error: undefined,
 32 |       mounted: false,
 33 |       rendition: undefined,
 34 |     }
 35 |   },
 36 |   async created() {
 37 |     const book = ePub();
 38 |     await book.open(this.data)
 39 |     await book.ready;
 40 | 
 41 |     this.rendition = book.renderTo(
 42 |         "epub-reader", {
 43 |           flow: "paginated",
 44 |           method: "continuous",
 45 |           width: "100%",
 46 |           height: "100%",
 47 |           resizeOnOrientationChange: true,
 48 |           allowScriptedContent: true,
 49 |         });
 50 | 
 51 |     this.rendition.on("locationChanged", (e) => {
 52 |       let anchor = undefined;
 53 |       if (e !== undefined) {
 54 |         anchor = e.start;
 55 |       }
 56 |       this.$emit('update-anchor', anchor);
 57 |     });
 58 |     this.rendition.on("rendered", (e) => {
 59 |       const iframe = document.querySelector('iframe').contentDocument;
 60 |       const hammer = Hammer(iframe.body);
 61 |       hammer.get('swipe').set({ direction: Hammer.DIRECTION_HORIZONTAL })
 62 |       hammer.get('pan').set({ direction: Hammer.DIRECTION_ALL });
 63 |       hammer.get('pinch').set({ enable: true });
 64 |       hammer.on('panstart panmove', (ev) => ev.preventDefault());
 65 |       hammer.on('swipeleft', () => this.rendition.next());
 66 |       hammer.on('swiperight', () => this.rendition.prev());
 67 |       iframe.addEventListener("keyup", this.key_listener, {capture: true});
 68 |     });
 69 |     this.setup_theme_processor();
 70 |     await this.rendition.display(this.anchor);
 71 |   },
 72 |   mounted() {
 73 |     document.addEventListener("keyup", this.key_listener, {capture: true})
 74 |     this.mounted = true
 75 |   },
 76 |   beforeUnmount() {
 77 |     if (this.mounted) {
 78 |       document.removeEventListener("keyup", this.key_listener, {capture: true});
 79 |       this.$refs.reader.removeEventListener("keyup", this.key_listener, {capture: true});
 80 |     }
 81 |     this.mounted = false;
 82 |   },
 83 |   methods: {
 84 |     setup_theme_processor() {
 85 |       // Color then inverted, so we have taken our main color and inverted it.
 86 |       this.rendition.themes.register("light", {});
 87 |       this.rendition.themes.register("dark",
 88 |           {
 89 |             "html": {"background-color": "rgb(216, 218, 222)"},
 90 |           });
 91 |       this.rendition.themes.select(window.matchMedia('(prefers-color-scheme: dark)').matches
 92 |           ? 'dark'
 93 |           : 'light');
 94 |       let that = this;
 95 |       window
 96 |           .matchMedia('(prefers-color-scheme: dark)')
 97 |           .addEventListener('change', function updateTheme() {
 98 |             that.rendition.themes.select(window.matchMedia('(prefers-color-scheme: dark)').matches
 99 |                 ? 'dark'
100 |                 : 'light');
101 |           });
102 |     },
103 |     key_listener(event) {
104 |       event.preventDefault();
105 |       if (event.key == "ArrowLeft") {
106 |         this.rendition.prev();
107 |       } else if (event.key == "ArrowRight") {
108 |         this.rendition.next();
109 |       } else if (event.key === "Escape") {
110 |         router.back();
111 |         return;
112 |       }
113 |     }
114 |   }
115 | })
116 | </script>
117 | <style lang="scss" scoped>
118 | #epub-reader {
119 |   position: fixed;
120 |   min-width: 100%;
121 |   width: 100%;
122 |   min-height: calc(100% - 126px);
123 |   height: calc(100% - 126px);
124 | }
125 | </style>
126 | 


--------------------------------------------------------------------------------
/tgbot/handlers/vote.py:
--------------------------------------------------------------------------------
  1 | import hashlib
  2 | import re
  3 | 
  4 | import telethon
  5 | from telethon import events
  6 | 
  7 | from library.telegram.base import RequestContext
  8 | 
  9 | from ..app.librarian_service import extract_internal_id
 10 | from .base import BaseCallbackQueryHandler
 11 | 
 12 | 
 13 | def remove_from_list(lst, value):
 14 |     try:
 15 |         lst.remove(value)
 16 |     except ValueError:
 17 |         pass
 18 | 
 19 | 
 20 | class VoteHandler(BaseCallbackQueryHandler):
 21 |     is_group_handler = True
 22 |     filter = events.CallbackQuery(pattern='^/vote_([ic])$')
 23 |     writing_handler = True
 24 | 
 25 |     votes_regexp = re.compile(r'Correct:(?P<correct>\s*.*)\nIncorrect:(?P<incorrect>\s*.*)')
 26 |     doi_regexp = re.compile(r'\*\*DOI:\*\* \[(?P<doi>.*)]\(.*\)')
 27 |     salt = 'y4XF-OsYl3M'
 28 | 
 29 |     def parse_pattern(self, event: events.ChatAction):
 30 |         vote = event.pattern_match.group(1).decode()
 31 |         return vote
 32 | 
 33 |     async def handler(self, event: events.ChatAction, request_context: RequestContext):
 34 |         user_id = event.query.user_id
 35 |         if user_id not in self.application.config['librarian']['moderators']:
 36 |             return await event.answer('You cannot vote')
 37 |         if self.application.is_read_only():
 38 |             return await event.answer('Read-only mode, try to vote later')
 39 | 
 40 |         vote = self.parse_pattern(event)
 41 | 
 42 |         request_context.add_default_fields(mode='vote')
 43 | 
 44 |         message = await event.get_message()
 45 |         text = message.text
 46 |         current_votes = self.votes_regexp.search(text)
 47 |         librarian_hash = hashlib.md5(f"{user_id}-{self.salt}".encode()).hexdigest()[-8:]
 48 | 
 49 |         request_context.statbox(
 50 |             action='vote',
 51 |             vote=vote,
 52 |         )
 53 | 
 54 |         sep = ', '
 55 |         correct_votes = []
 56 |         if correct_votes_str := current_votes.group('correct').strip():
 57 |             correct_votes = correct_votes_str.split(sep)
 58 |         incorrect_votes = []
 59 |         if incorrect_votes_str := current_votes.group('incorrect').strip():
 60 |             incorrect_votes = incorrect_votes_str.split(sep)
 61 | 
 62 |         remove_from_list(correct_votes, librarian_hash)
 63 |         remove_from_list(incorrect_votes, librarian_hash)
 64 | 
 65 |         if vote == 'c':
 66 |             correct_votes.append(librarian_hash)
 67 |         else:
 68 |             incorrect_votes.append(librarian_hash)
 69 | 
 70 |         span = current_votes.span('incorrect')
 71 |         text = text[:span[0]] + ' ' + sep.join(incorrect_votes) + text[span[1]:]
 72 |         span = current_votes.span('correct')
 73 |         text = text[:span[0]] + ' ' + sep.join(correct_votes) + text[span[1]:]
 74 |         await message.edit(text)
 75 | 
 76 |         if (
 77 |             len(correct_votes) - len(incorrect_votes) >= self.application.config['librarian']['required_votes']
 78 |             or user_id in self.application.config['librarian']['super_moderators'] and vote == 'c'
 79 |         ):
 80 |             await message.edit(text, buttons=None)
 81 |             if internal_id := extract_internal_id(text):
 82 |                 pass
 83 |             elif doi_re := self.doi_regexp.search(text):
 84 |                 internal_id = f'id.dois:{doi_re.group("doi").strip().lower()}'
 85 |             else:
 86 |                 raise ValueError()
 87 |             field, value = internal_id.split(':', 1)
 88 |             document = await self.application.summa_client.get_one_by_field_value('nexus_science', field, value)
 89 |             file = await message.download_media(file=bytes)
 90 | 
 91 |             request_context.statbox(
 92 |                 action='pinning',
 93 |                 internal_id=internal_id,
 94 |                 filesize=len(file),
 95 |             )
 96 | 
 97 |             await self.application.file_flow.pin_add(document, file, with_commit=True)
 98 |             await self.application.database.add_approve(message.id, 1)
 99 |             reply_message = await message.get_reply_message()
100 |             if reply_message:
101 |                 try:
102 |                     await reply_message.delete()
103 |                 except telethon.errors.rpcerrorlist.MessageDeleteForbiddenError:
104 |                     pass
105 |             await event.delete()
106 |         else:
107 |             await message.edit(text)
108 | 


--------------------------------------------------------------------------------
/web/src/components/DocumentSnippet.vue:
--------------------------------------------------------------------------------
  1 | <template lang="pug">
  2 | div
  3 |   .row
  4 |     .col-3.col-lg-2(v-if="with_cover && cover")
  5 |       a(:href="item_link()")
  6 |         img.mb-3.img-thumbnail(width="100" :src="cover")
  7 |     .col-9.col-lg-10
  8 |       a.text-decoration-none.h5(v-if="with_large_caption" v-html="prepared_title" :href="item_link()")
  9 |       a.text-decoration-none.h6(v-else v-html="prepared_title" :href="item_link()")
 10 |       .mt-1(v-html="coordinates")
 11 |       .text-secondary(v-if="with_extras")
 12 |         span(v-html="extras")
 13 |         span(v-if="has_bookmark") &nbsp;|&nbsp;
 14 |           i.bi-bookmark-check-fill
 15 |   .row
 16 |     .col-12
 17 |       .mt-2(v-if="with_abstract && prepared_snippets", v-html="prepared_snippets")
 18 |       .mt-2(v-if="with_tags")
 19 |         tags-list(:tags="small_tags")
 20 | </template>
 21 | 
 22 | <script lang="ts">
 23 | import { defineComponent } from 'vue'
 24 | 
 25 | import BaseDocument from "@/components/BaseDocument.vue";
 26 | import { user_db } from "@/database";
 27 | import {decode_html, extract_text_from_html, remove_unpaired_escaped_tags} from "@/utils";
 28 | 
 29 | export default defineComponent({
 30 |   name: 'DocumentSnippet',
 31 |   components: { BaseDocument },
 32 |   extends: BaseDocument,
 33 |   props: {
 34 |     with_abstract: {
 35 |       type: Boolean,
 36 |       required: false,
 37 |       default: true
 38 |     },
 39 |     with_cover: {
 40 |       type: Boolean,
 41 |       required: false,
 42 |       default: false
 43 |     },
 44 |     with_extras: {
 45 |       type: Boolean,
 46 |       required: false,
 47 |       default: true
 48 |     },
 49 |     with_large_caption: {
 50 |       type: Boolean,
 51 |       required: false,
 52 |       default: false
 53 |     },
 54 |     with_tags: {
 55 |       type: Boolean,
 56 |       required: false,
 57 |       default: true
 58 |     },
 59 |     snippets: {
 60 |       type: Object
 61 |     }
 62 |   },
 63 |   data () {
 64 |     return {
 65 |       has_bookmark: false,
 66 |     }
 67 |   },
 68 |   async created () {
 69 |     this.has_bookmark = await user_db.has_bookmark(
 70 |       "nexus_science",
 71 |       this.id_query()
 72 |     )
 73 |   },
 74 |   computed: {
 75 |     prepared_snippets () {
 76 |       if (!this.document.abstract) {
 77 |         return null
 78 |       }
 79 |       let abstract = "";
 80 |       if (this.snippets.abstract) {
 81 |         abstract = this.snippets.abstract.html;
 82 |       }
 83 |       if (abstract.length === 0) {
 84 |         abstract = this.document.abstract.substring(0, 400)
 85 |         if (this.document.abstract.length > 400) {
 86 |           abstract += '...'
 87 |         }
 88 |         abstract = abstract.replace( /(<([^>]+)>)/ig, '');
 89 |         abstract = abstract.replace(/&lt;.*?&gt;/g, "")
 90 |       } else {
 91 |         const encoder = new TextEncoder()
 92 |         const original_length = encoder.encode(this.document.abstract).length
 93 |         const snippet_length =
 94 |           this.snippets.abstract.fragment.length
 95 | 
 96 |         if (original_length > snippet_length) {
 97 |           abstract += '...'
 98 |         }
 99 |         const full_decoded_abstract = extract_text_from_html(this.document.abstract)
100 |         abstract = abstract.replace(/&lt;.*?&gt;/g, "");
101 |         const snippet_decoded_abstract = extract_text_from_html(abstract)
102 |         if (full_decoded_abstract.substring(0, 32) !== snippet_decoded_abstract.substring(0, 32)) {
103 |           abstract = '...' + abstract
104 |         }
105 |       }
106 |       return abstract
107 |     },
108 |     prepared_title () {
109 |       let title = (this.document.title || 'No title').slice(0, this.max_title_length)
110 |       if (
111 |         this.snippets &&
112 |         this.snippets.title &&
113 |         this.snippets.title.html &&
114 |         this.snippets.title.html.length > 0
115 |       ) {
116 |         title = this.snippets.title.html
117 |       }
118 |       const encoder = new TextEncoder()
119 |       const original_length = encoder.encode(this.document.title).length
120 |       if (original_length > this.max_title_length) {
121 |         title += '...'
122 |       }
123 |       return decode_html(remove_unpaired_escaped_tags(title))
124 |     },
125 |   }
126 | })
127 | </script>
128 | 
129 | <style scoped lang="scss">
130 | li {
131 |   padding-bottom: 15px;
132 |   padding-left: 0;
133 |   &:after {
134 |     content: none;
135 |   }
136 | }
137 | </style>
138 | 


--------------------------------------------------------------------------------
/tgbot/handlers/mlt.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | from izihawa_utils.common import filter_none
 4 | from stc_geck.advices import get_default_scorer
 5 | from telethon import events
 6 | 
 7 | from library.telegram.base import RequestContext
 8 | from library.telegram.common import close_button
 9 | from tgbot.translations import t
10 | from tgbot.views.telegram.base_holder import BaseTelegramDocumentHolder
11 | 
12 | from ..views.telegram.common import (
13 |     recode_base64_to_base36,
14 |     remove_button,
15 | )
16 | from .base import BaseHandler
17 | 
18 | 
19 | class MltHandler(BaseHandler):
20 |     filter = events.CallbackQuery(pattern='^/(m|n)_(.*)')
21 |     fail_as_reply = False
22 | 
23 |     def parse_pattern(self, event: events.ChatAction):
24 |         command = event.pattern_match.group(1).decode()
25 |         if command == 'm':
26 |             cid = recode_base64_to_base36(event.pattern_match.group(2).decode())
27 |             return 'links.cid', cid
28 |         else:
29 |             internal_id = event.pattern_match.group(2).decode()
30 |             return internal_id.split(':', 1)
31 | 
32 |     async def handler(self, event: events.ChatAction, request_context: RequestContext):
33 |         field, value = self.parse_pattern(event)
34 | 
35 |         request_context.add_default_fields(mode='mlt', field=field, value=value)
36 |         request_context.statbox(action='view')
37 | 
38 |         prefetch_message = await self.application.get_telegram_client(request_context.bot_name).send_message(
39 |             event.chat,
40 |             t("SEARCHING", request_context.chat['language'])
41 |         )
42 | 
43 |         source_document = await self.application.summa_client.get_one_by_field_value('nexus_science', field, value)
44 | 
45 |         if not source_document:
46 |             return await event.reply(t("OUTDATED_VIEW_LINK", request_context.chat['language']))
47 | 
48 |         document_dump = filter_none({
49 |             'title': source_document.get('title'),
50 |             'abstract': source_document.get('abstract'),
51 |             'tags': source_document.get('tags'),
52 |             'languages': source_document.get('languages'),
53 |         })
54 | 
55 |         subqueries = [{
56 |             'occur': 'should',
57 |             'query': {'more_like_this': {
58 |                 'boost': '3.0',
59 |                 'max_query_terms': 64,
60 |                 'min_term_frequency': 1,
61 |                 'min_doc_frequency': 1,
62 |                 'max_doc_frequency': 1_000_000,
63 |                 'document': json.dumps(document_dump)
64 |             }}
65 |         }]
66 | 
67 |         requested_type = 'type:book type:"edited-book" type:monograph type:"reference-book" type:"journal-article"'
68 |         if source_document['type'] in {'book', 'edited-book', 'monograph', 'reference-book'}:
69 |             requested_type = 'type:book type:"edited-book" type:monograph type:"reference-book"'
70 |         elif source_document['type'] == 'journal-article':
71 |             requested_type = 'type:"journal-article"'
72 |         elif source_document['type'] == 'proceedings-article':
73 |             requested_type = 'type:"proceedings-article"'
74 | 
75 |         documents = await self.application.summa_client.search_documents({
76 |             'index_alias': 'nexus_science',
77 |             'query': {'boolean': {'subqueries': [
78 |                 {'occur': 'must', 'query': {'boolean': {'subqueries': subqueries}}},
79 |                 {'occur': 'must', 'query': {'match': {'value': requested_type}}},
80 |                 {'occur': 'must_not', 'query': {'match': {'value': BaseTelegramDocumentHolder(source_document).get_internal_id()}}}
81 |             ]}},
82 |             'collectors': [{'top_docs': {'limit': 5, 'scorer': get_default_scorer(self.application.search_request_builder.profile)}}],
83 |         })
84 | 
85 |         serp_elements = []
86 |         source_document = BaseTelegramDocumentHolder(source_document)
87 |         for document in documents:
88 |             serp_elements.append(BaseTelegramDocumentHolder(document).base_render(
89 |                 request_context,
90 |                 with_librarian_service=bool(self.application.librarian_service) and not self.application.is_read_only()
91 |             ))
92 |         serp = '\n\n'.join(serp_elements)
93 |         serp = f'**Similar To: {source_document.get_title_with_link(bot_name=request_context.bot_name)}**\n\n{serp}'
94 |         await remove_button(event, '🖲', and_empty_too=True)
95 |         return await prefetch_message.edit(serp, buttons=[close_button()])
96 | 


--------------------------------------------------------------------------------
/tgbot/configs/base.yaml:
--------------------------------------------------------------------------------
  1 | ---
  2 | 
  3 | application:
  4 |   banned_chat_ids: []
  5 |   bot_version: 2.0.0
  6 |   btc_donate_address: '357vJAFsYeCtLU36MYEgaDueg34rr5ajCy'
  7 |   # List of chat IDs that is allowed to bypass maintenance mode
  8 |   bypass_maintenance: []
  9 |   data_directory: /usr/lib/stc-tgbot
 10 |   # Debugging mode
 11 |   debug: true
 12 |   # URL of picture to show in error message
 13 |   error_picture_url:
 14 |   eth_donate_address: '0x199bECe965e4e1e2fE3065d3F551Ebe8520AC555'
 15 |   # Enabled index_aliases
 16 |   # All users (except `bypass_maintenance` ones) will get UPGRADE_MAINTENANCE message in response
 17 |   is_maintenance_mode: false
 18 |   # Set to true for restricting writing operations (such as submitting files or requesting new ones)
 19 |   is_read_only: true
 20 |   # Default page size for SERP
 21 |   page_size: 5
 22 |   # Length of generated Request-ID used for tracking requests across all backends
 23 |   request_id_length: 12
 24 |   # Length of generated Session-ID used in commands to clue user sessions
 25 |   session_id_length: 8
 26 |   sol_donate_address: 'FcJG17cEyG8LnNkdJg8HCAQQZKxqpwTupD9fc3GXMqxD'
 27 |   # URL of picture to show in the message about queries with invalid syntax
 28 |   too_difficult_picture_url:
 29 |   # URL of picture to show in maintenance message
 30 |   upgrade_maintenance_picture_url:
 31 |   xmr_donate_address: '42HZx5Cg1uQ2CtCrq7QabP23BN7gBrGu6U6QumkMmR4bKS61gcoP8xyNzP5cJCbjac9yaWFhLsDmM3adMWyBKBXn1d9WiUb'
 32 |   xrp_donate_address: 'rw2ciyaNshpHe7bCHo4bRWq6pqqynnWKQg'
 33 |   xrp_donate_tag: '1968122674'
 34 |   # Number of async routines for starting all boots
 35 |   workers: 8
 36 | # File Flow service for storing files. Cannot be used in light mode
 37 | file_flow:
 38 |   enabled: false
 39 | ipfs:
 40 |   # Base URL for IPFS Gateway. You can choose any public gateway, such as https://dweb.link/ or https://ipfs.io/
 41 |   # or setup your own gateway locally and set it with http://127.0.0.1:8080
 42 |   http:
 43 |     base_url: http://ipfs:8080
 44 | # Configure Librarian service for uploading files. Cannot be used in light mode
 45 | librarian:
 46 |   # Credentials of admin account for managing Aaron's groups
 47 |   admin:
 48 |     app_id:
 49 |     app_hash:
 50 |     phone:
 51 |   # Credentials of bot account for posting messages
 52 |   bot:
 53 |     app_id:
 54 |     app_hash:
 55 |     bot_name:
 56 |     bot_token:
 57 |   enabled: false
 58 |   group_name: 'nexus_aaron'
 59 |   moderators: []
 60 |   # How large should be difference between correct and incorrect votes for approving
 61 |   required_votes: 2
 62 |   # Can solely approve uploaded items
 63 |   super_moderators: []
 64 | log_path: /var/log/stc-tgbot
 65 | # Configure Metadate retriever for ingesting new files. Cannot be used in light mode
 66 | metadata_retriever:
 67 |   # Configuration of Crossref API Client
 68 |   crossref_api:
 69 |     timeout: 15
 70 |     user_agent: anon@example.com
 71 |   enabled: false
 72 |   # Index for committing changes
 73 |   index_alias: nexus_science
 74 | reddit:
 75 |   url: https://reddit.com/r/science_nexus
 76 | telegram:
 77 |   # Enabled handlers
 78 |   command_handlers:
 79 |     - tgbot.handlers.aboutus.AboutusHandler
 80 |     - tgbot.handlers.close.CloseHandler
 81 |     - tgbot.handlers.cybrex.CybrexHandler
 82 |     - tgbot.handlers.download.DownloadHandler
 83 |     - tgbot.handlers.howtohelp.HowToHelpHandler
 84 |     - tgbot.handlers.help.HelpHandler
 85 |     - tgbot.handlers.q.QHandler
 86 |     - tgbot.handlers.report.ReportHandler
 87 |     - tgbot.handlers.riot.RiotHandler
 88 |     - tgbot.handlers.riot.RiotBFHandler
 89 |     - tgbot.handlers.riot.RiotOldHandler
 90 |     - tgbot.handlers.riot.RiotCredHandler
 91 |     - tgbot.handlers.roll.RollHandler
 92 |     - tgbot.handlers.shortlink.ShortlinkHandler
 93 |     - tgbot.handlers.start.StartHandler
 94 |     - tgbot.handlers.stop.StopHandler
 95 |     - tgbot.handlers.mlt.MltHandler
 96 |     - tgbot.handlers.view.ViewHandler
 97 |     - tgbot.handlers.noop.NoopHandler
 98 |   # Channel that will be shown in /help, /howtohelp and in promotions
 99 |   related_channel: nexus_search
100 |   search_handlers:
101 |     - tgbot.handlers.search.SearchHandler
102 |     - tgbot.handlers.search.SearchEditHandler
103 |     - tgbot.handlers.search.SearchPagingHandler
104 |     - tgbot.handlers.search.InlineSearchHandler
105 | summa:
106 |   endpoint: 127.0.0.0:10082
107 |   embed:
108 |     enabled: true
109 |     ipfs_data_directory: /ipns/libstc.cc/data/
110 | twitter:
111 |   contact_url: https://twitter.com/the_superpirate
112 | 


--------------------------------------------------------------------------------
/web/src/components/download-progress.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @desc downloadProgress initiator
  3 |  * @param files {Array}
  4 |  * @event beforeLoading
  5 |  * @event afterLoading
  6 |  * @event progress
  7 |  * @return {downloadProgressObject}
  8 |  **/
  9 | export default class DownloadProgress {
 10 |   /**
 11 |    * @desc downloadProgress constructor
 12 |    * @param files {Array}
 13 |    **/
 14 |   files: string[]
 15 |   percentages: {}
 16 |   percentage: number
 17 |   events: {
 18 |     beforeLoading: Event
 19 |     afterLoading: any
 20 |     progress: any
 21 |   }
 22 | 
 23 |   promises: Array<Promise<any>>
 24 | 
 25 |   constructor (files) {
 26 |     this.files = files
 27 |     this.percentages = {}
 28 |     this.percentage = 0
 29 |     this.events = {
 30 |       beforeLoading: new Event('beforeLoading'),
 31 |       afterLoading: function (response, url) {
 32 |         return new CustomEvent('afterLoading', {
 33 |           detail: { response, url }
 34 |         })
 35 |       },
 36 |       progress: function (percentage) {
 37 |         return new CustomEvent('progress', { detail: percentage })
 38 |       }
 39 |     }
 40 |     this.promises = []
 41 |   }
 42 | 
 43 |   /**
 44 |    * @desc the callback that gets called on update progress
 45 |    * @param url {String}
 46 |    * @param oEvent {Object}
 47 |    **/
 48 |   _downloadProgressUpdateProgress (url, oEvent) {
 49 |     const percentComplete = oEvent.lengthComputable
 50 |       ? oEvent.loaded / oEvent.total
 51 |       : oEvent.loaded /
 52 |         (oEvent.target.getResponseHeader('x-decompressed-content-length') || oEvent.target.getResponseHeader('content-length'))
 53 |     let totalPercentage = 0
 54 |     let key
 55 |     this.percentages[url] = percentComplete
 56 |     for (key in this.percentages) {
 57 |       totalPercentage += this.percentages[key]
 58 |     }
 59 |     this.percentage = (totalPercentage / this.files.length) * 100
 60 |     document.dispatchEvent(this.events.progress(this.percentage))
 61 |   }
 62 | 
 63 |   /**
 64 |    * @desc gets the target file and sends the responseText back
 65 |    * @param index {Number}
 66 |    **/
 67 |   async initiate_download (index) {
 68 |     const that = this
 69 |     return await new Promise(function (resolve, reject) {
 70 |       const xhr = new XMLHttpRequest()
 71 |       const url = that.files[index]
 72 |       xhr.addEventListener(
 73 |         'progress',
 74 |         that._downloadProgressUpdateProgress.bind(that, url)
 75 |       )
 76 |       xhr.responseType = "arraybuffer";
 77 |       xhr.open('GET', url)
 78 |       xhr.onreadystatechange = function (index) {
 79 |         if (xhr.status === 200 && xhr.readyState === 4) {
 80 |           document.dispatchEvent(
 81 |             that.events.afterLoading(xhr.response, that.files[index])
 82 |           )
 83 |         }
 84 |       }.bind(that, index)
 85 |       xhr.onload = function () {
 86 |         if (xhr.status >= 200 && xhr.status < 300) {
 87 |           resolve(xhr.response)
 88 |         } else {
 89 |           reject({
 90 |             status: xhr.status,
 91 |             statusText: xhr.statusText
 92 |           })
 93 |         }
 94 |       }
 95 |       xhr.onerror = function () {
 96 |         reject({
 97 |           status: xhr.status,
 98 |           statusText: xhr.statusText
 99 |         })
100 |       }
101 |       xhr.send()
102 |     })
103 |   }
104 | 
105 |   /**
106 |    * @desc attaches the callback to the given even
107 |    * @param event {Object}
108 |    * @param callback {Function}
109 |    * @return {downloadProgressObject}
110 |    **/
111 |   on (event, callback) {
112 |     document.addEventListener(event, callback, false)
113 |     return this
114 |   }
115 | 
116 |   /**
117 |    * @desc initializes the loading
118 |    * @return {downloadProgressObject}
119 |    **/
120 |   init () {
121 |     document.dispatchEvent(this.events.beforeLoading)
122 |     let i = 0
123 |     for (; i < this.files.length; i++) {
124 |       this.percentages[this.files[i]] = 0
125 |       this.promises.push(this.initiate_download(i))
126 |     }
127 |     return this
128 |   }
129 | }
130 | 
131 | export async function tracked_download (files, progress_bar) {
132 |   const dp = new DownloadProgress(files)
133 |   dp.on('progress', function (e) {
134 |     let downloaded = e.detail
135 |     if (e.detail === Infinity) {
136 |       downloaded = 0
137 |     }
138 | 
139 |     progress_bar.value = `${downloaded.toFixed(0)}%`
140 |   }).on('afterLoading', function () {
141 |     progress_bar.value = undefined
142 |   })
143 |   dp.init()
144 |   return await Promise.all(dp.promises)
145 | }
146 | 


--------------------------------------------------------------------------------
/web/src/components/PdfReader.vue:
--------------------------------------------------------------------------------
  1 | <template lang="pug">
  2 | .container.col-md-8.offset-md-2(v-if="is_rendering")
  3 |   loading-spinner(style="margin-top: 140px" :label="'rendering...'")
  4 | canvas(v-show="!is_rendering" id="pdf-reader" ref="reader")
  5 | </template>
  6 | 
  7 | <script lang="ts">
  8 | import {defineComponent, type PropType, toRaw} from 'vue'
  9 | 
 10 | import router from "@/router";
 11 | import ConnectivityIssuesView from "@/components/ConnectivityIssues.vue";
 12 | import LoadingSpinner from "@/components/LoadingSpinner.vue";
 13 | import * as PdfJs from "pdfjs-dist";
 14 | import Hammer from 'hammerjs'
 15 | 
 16 | PdfJs.GlobalWorkerOptions.workerSrc = new URL(
 17 |     '~/pdfjs-dist/build/pdf.worker.js',
 18 |     import.meta.url
 19 | );
 20 | 
 21 | export default defineComponent({
 22 |   name: 'PdfReader',
 23 |   emits: ["update-anchor"],
 24 |   components: {ConnectivityIssuesView, LoadingSpinner},
 25 |   props: {
 26 |     anchor: undefined as PropType<string>,
 27 |     data: {
 28 |       type: undefined as PropType<ArrayBuffer>
 29 |     },
 30 |   },
 31 |   data() {
 32 |     let d = {
 33 |       is_rendering: false,
 34 |       current_page: 1,
 35 |       pdf_document: undefined,
 36 |       mounted: false,
 37 |       scale: 0.8,
 38 |       hammer: undefined,
 39 |     }
 40 |     if (this.anchor !== undefined) {
 41 |       d.current_page = Number.parseInt(this.anchor);
 42 |     }
 43 |     return d;
 44 |   },
 45 |   async created() {
 46 |     this.pdf_document = await PdfJs.getDocument(this.data).promise;
 47 |     await this.render(undefined);
 48 |   },
 49 |   mounted() {
 50 |     this.hammer = Hammer(this.$refs.reader, { touchAction : 'pan-y' });
 51 |     document.addEventListener("keyup", this.key_listener);
 52 |     this.hammer.get('swipe').set({ direction: Hammer.DIRECTION_HORIZONTAL })
 53 |     this.hammer.get('pan').set({ direction: Hammer.DIRECTION_ALL });
 54 |     this.hammer.get('pinch').set({ enable: true });
 55 |     this.hammer.on('panstart panmove', (ev) => ev.preventDefault());
 56 |     this.hammer.on('swipeleft', () => this.next_page());
 57 |     this.hammer.on('swiperight', () => this.previous_page());
 58 |     this.mounted = true
 59 |   },
 60 |   beforeUnmount() {
 61 |     if (this.mounted) {
 62 |       document.removeEventListener("keyup", this.key_listener, {capture: true});
 63 |       this.hammer.off("swipeleft");
 64 |       this.hammer.off("swiperight");
 65 |       this.hammer.off("panstart");
 66 |       this.hammer.off("panmove");
 67 |     }
 68 |     this.mounted = false;
 69 |   },
 70 |   methods: {
 71 |     async render(old_page) {
 72 |       if (this.is_rendering) {
 73 |         return;
 74 |       }
 75 |       this.is_rendering = true;
 76 |       try {
 77 |         const pdf_document = toRaw(this.pdf_document)
 78 |         const page = await pdf_document.getPage(this.current_page);
 79 |         const viewport = page.getViewport({ scale: window.devicePixelRatio * this.scale * 2, });
 80 |         this.$refs.reader.width = viewport.width;
 81 |         this.$refs.reader.height = viewport.height;
 82 |         this.$refs.reader.style.width = `${100 * this.scale}%`
 83 |         this.$refs.reader.style['min-width'] = `${100 * this.scale}%`
 84 |         const context = this.$refs.reader.getContext('2d');
 85 |         var renderContext = {
 86 |           canvasContext: context,
 87 |           viewport: viewport
 88 |         };
 89 |         page.render(renderContext);
 90 |         this.$emit("update-anchor", this.current_page.toString())
 91 |       } catch (e) {
 92 |         if (old_page !== undefined) {
 93 |           this.current_page = old_page;
 94 |         }
 95 |       }
 96 |       finally {
 97 |         this.is_rendering = false;
 98 |       }
 99 |     },
100 |     previous_page() {
101 |       const old_page = this.current_page;
102 |       this.current_page -= 1;
103 |       this.render(old_page);
104 |     },
105 |     next_page() {
106 |       const old_page = this.current_page;
107 |       this.current_page += 1;
108 |       this.render(old_page);
109 |     },
110 |     key_listener(event) {
111 |       event.preventDefault();
112 |       if (event.key == "ArrowLeft") {
113 |         this.previous_page()
114 |       } else if (event.key == "ArrowRight") {
115 |         this.next_page()
116 |       } else if (event.key === "Escape") {
117 |         router.back();
118 |         return;
119 |       }
120 |     },
121 |   },
122 | })
123 | </script>
124 | <style lang="scss" scoped>
125 | #pdf-reader {
126 |   display: block;
127 |   margin-left: auto;
128 |   margin-right: auto;
129 |   min-width: 100%;
130 |   width: 100%;
131 | }
132 | </style>
133 | 


--------------------------------------------------------------------------------
/web/src/services/search/query-processor.ts:
--------------------------------------------------------------------------------
  1 | import { grpc_web } from "summa-wasm";
  2 | 
  3 | interface QueryConfig {
  4 |     page: number
  5 |     page_size?: number
  6 |     fields?: string[]
  7 |     index_name?: string
  8 |     language?: string
  9 |     is_date_sorting_enabled: boolean
 10 |     random?: boolean
 11 |     type?: Type,
 12 |     timerange: [number, number]
 13 |     query_parser_config: grpc_web.query.QueryParserConfig,
 14 | }
 15 | 
 16 | export enum Type {
 17 |     Books = "📚 Books",
 18 | }
 19 | 
 20 | export enum Language {
 21 |     en = '🇬🇧 English',
 22 |     ar = '🇦🇪 Arabic',
 23 |     zh = '🇨🇳 Chinese',
 24 |     am = '🇪🇹 Ethiopian',
 25 |     fa = '🇮🇷 Farsi',
 26 |     de = '🇩🇪 German',
 27 |     hi = '🇮🇳 Hindi',
 28 |     id = '🇮🇩 Indonesian',
 29 |     it = '🇮🇹 Italian',
 30 |     ja = '🇯🇵 Japanese',
 31 |     ms = '🇲🇾 Malay',
 32 |     pt = '🇧🇷 Portuguese',
 33 |     ru = '🇷🇺 Russian',
 34 |     es = '🇪🇸 Spanish',
 35 |     tg = '🇹🇯 Tajik',
 36 |     uk = '🇺🇦 Ukrainian',
 37 |     uz = '🇺🇿 Uzbek'
 38 | }
 39 | 
 40 | export class QueryProcessor {
 41 |     generate_request(index_config: object, query: string, query_config: QueryConfig) {
 42 |         return {
 43 |             index_alias: index_config.index_name,
 44 |             query: default_queries(
 45 |                 query,
 46 |                 query_config,
 47 |             ),
 48 |             collectors: default_collectors(query_config),
 49 |             is_fieldnorms_scoring_enabled: false,
 50 |             store_cache: true,
 51 |             load_cache: true
 52 |         }
 53 |     }
 54 | }
 55 | 
 56 | export function default_queries(
 57 |     query: string,
 58 |     options: QueryConfig,
 59 | ) {
 60 |     let structured_query = {all: {}}
 61 |     if (query) {
 62 |         structured_query = {match: {value: query}}
 63 |         if (options.query_parser_config) {
 64 |             structured_query.match.query_parser_config = options.query_parser_config
 65 |         }
 66 |     }
 67 |     if ((options.language || options.type || options.timerange)) {
 68 |         let subqueries = [];
 69 |         if (query) {
 70 |             subqueries = [{
 71 |                 query: structured_query,
 72 |                 occur: 1
 73 |             }]
 74 |         }
 75 |         if (options.language) {
 76 |             subqueries.push({
 77 |                 query: {term: {field: 'languages', value: options.language}},
 78 |                 occur: 1
 79 |             })
 80 |         }
 81 |         if (options.type === "Books") {
 82 |             subqueries.push({
 83 |                 query: {
 84 |                     boolean: {
 85 |                         subqueries: [
 86 |                             {occur: 0, query: {term: {field: "type", value: "book"}}},
 87 |                             {occur: 0, query: {term: {field: "type", value: "edited-book"}}},
 88 |                             {occur: 0, query: {term: {field: "type", value: "monograph"}}},
 89 |                             {occur: 0, query: {term: {field: "type", value: "reference-book"}}},
 90 |                         ]
 91 |                     }
 92 |                 },
 93 |                 occur: 1,
 94 |             })
 95 |         }
 96 |         if (options.timerange) {
 97 |             subqueries.push({
 98 |                 query: {
 99 |                     range: {
100 |                         field: 'issued_at', value: {
101 |                             left: options.timerange[0].toString(), including_left: true,
102 |                             right: options.timerange[1].toString(), including_right: false,
103 |                         }
104 |                     }
105 |                 },
106 |                 occur: 1
107 |             })
108 |         }
109 |         structured_query = {
110 |             boolean: {
111 |                 subqueries: subqueries
112 |             }
113 |         }
114 |     }
115 |     return structured_query
116 | }
117 | const TEMPORAL_RANKING_FORMULA = "original_score * custom_score * fastsigm(abs(now - issued_at) / (86400 * 3) + 5, -1)"
118 | const PR_TEMPORAL_RANKING_FORMULA = `${TEMPORAL_RANKING_FORMULA} * 1.96 * fastsigm(iqpr(quantized_page_rank), 0.15)`
119 | 
120 | export function default_collectors(
121 |     query_config: QueryConfig,
122 | ) {
123 |     const page_size = query_config.page_size ?? 5
124 |     if (query_config.random) {
125 |         return [{reservoir_sampling: {limit: query_config.page_size}}, {count: {}}]
126 |     }
127 |     return [{
128 |         top_docs: {
129 |             offset: (query_config.page - 1) * page_size,
130 |             limit: page_size,
131 |             snippet_configs: {abstract: 400, title: 180},
132 |             fields: (query_config.fields != null) || [],
133 |             scorer: query_config.is_date_sorting_enabled
134 |                 ? {order_by: 'issued_at'}
135 |                 : null
136 |         }
137 |     }, {
138 |         count: {}
139 |     }]
140 | }
141 | 


--------------------------------------------------------------------------------
/tgbot/handlers/cybrex.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import re
  3 | import shlex
  4 | 
  5 | from telethon import events
  6 | 
  7 | from library.telegram.base import RequestContext
  8 | 
  9 | from .base import BaseHandler
 10 | 
 11 | 
 12 | class CybrexHandler(BaseHandler):
 13 |     filter = events.NewMessage(incoming=True, pattern=re.compile(r'^/cybrex(?:@\w+)?(?:\s+)?(.*)?$', re.DOTALL))
 14 |     is_group_handler = True
 15 | 
 16 |     def parse_command(self, query):
 17 |         args = []
 18 |         kwargs = {}
 19 |         argv = shlex.split(query)
 20 |         cmd, argv = argv[0], argv[1:]
 21 |         for arg in argv:
 22 |             if arg.startswith('-'):
 23 |                 arg = arg.lstrip('-')
 24 |                 k, v = arg.split('=', 1)
 25 |                 k = k.replace('-', '_')
 26 |                 try:
 27 |                     v = int(v)
 28 |                 except ValueError:
 29 |                     try:
 30 |                         v = float(v)
 31 |                     except ValueError:
 32 |                         pass
 33 |                 kwargs[k.replace('-', '_')] = v
 34 |             else:
 35 |                 args.append(arg)
 36 |         return cmd, args, kwargs
 37 | 
 38 |     async def handler(self, event: events.ChatAction, request_context: RequestContext):
 39 |         session_id = self.generate_session_id()
 40 |         request_context.add_default_fields(mode='cybrex', session_id=session_id)
 41 |         request_context.statbox(action='show', sender_id=event.sender_id, event=str(event))
 42 | 
 43 |         is_allowed = event.sender_id and int(event.sender_id) in self.application.config['application']['cybrex_whitelist']
 44 |         is_allowed = is_allowed or (
 45 |             event.sender_id is None
 46 |             and request_context.chat['chat_id'] in self.application.config['application']['cybrex_whitelist']
 47 |         )
 48 |         if not is_allowed:
 49 |             return await event.reply('Only People of Nexus can call me')
 50 | 
 51 |         if not self.application.cybrex_ai:
 52 |             return await event.reply('Cybrex is disabled for now')
 53 | 
 54 |         query = event.pattern_match.group(1).strip()
 55 |         if not query:
 56 |             text = "My name is Cybrex and I can respond queries based on STC data."
 57 |             return await event.reply(text)
 58 | 
 59 |         reply_message = await event.get_reply_message()
 60 |         request_context.statbox(action='found_reply_message', reply_message=str(reply_message))
 61 | 
 62 |         if reply_message and reply_message.raw_text:
 63 |             wait_message = await event.reply('`All right, wait a sec...`')
 64 | 
 65 |             text = reply_message.raw_text
 66 |             cybrex_response = await self.application.cybrex_ai.general_text_processing(query, text)
 67 |             response = f'🤔 **{query}**'
 68 |             response = f'{response}\n\n🤖: {cybrex_response.answer.strip()}'
 69 |             return await asyncio.gather(
 70 |                 wait_message.delete(),
 71 |                 reply_message.reply(response),
 72 |             )
 73 | 
 74 |         wait_message = await event.reply('`Looking for the answer in STC...`')
 75 | 
 76 |         cli = {
 77 |             'chat-doc': self.application.cybrex_ai.chat_document,
 78 |             'chat-sci': self.application.cybrex_ai.chat_science,
 79 |             'semantic-search': self.application.cybrex_ai.semantic_search,
 80 |             'sum-doc': self.application.cybrex_ai.summarize_document,
 81 |         }
 82 | 
 83 |         cmd, args, kwargs = self.parse_command(query)
 84 |         response = await cli[cmd](*args, **kwargs)
 85 |         show_texts = False
 86 | 
 87 |         if cmd == 'semantic-search':
 88 |             answer, chunks = None, [scored_chunk.chunk for scored_chunk in response]
 89 |             show_texts = True
 90 |         else:
 91 |             answer, chunks = response.answer, response.chunks
 92 | 
 93 |         response = f'🤔 **{args[0]}**'
 94 |         if answer:
 95 |             response = f'{response}\n\n🤖: {answer}'
 96 | 
 97 |         references = []
 98 |         visited = set()
 99 |         for chunk in chunks[:3]:
100 |             field, value = chunk.document_id.split(':', 2)
101 |             document_id = f'{field}:{value}'
102 |             title = chunk.title.split("\n")[0]
103 |             reference = f' - **{title}** - `{document_id}`'
104 |             if show_texts:
105 |                 reference += f'\n**Text:** {chunk.text}'
106 |             else:
107 |                 if document_id in visited:
108 |                     continue
109 |             visited.add(document_id)
110 |             references.append(reference)
111 | 
112 |         if show_texts:
113 |             references = '\n\n'.join(references)
114 |         else:
115 |             references = '\n'.join(references)
116 |         if references:
117 |             response += f'\n\n**References:**\n\n{references}'
118 | 
119 |         return await asyncio.gather(
120 |             wait_message.delete(),
121 |             event.reply(response),
122 |         )
123 | 


--------------------------------------------------------------------------------
/tgbot/handlers/riot.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | from telethon import events
  4 | from telethon.tl.types import PeerUser
  5 | 
  6 | from library.telegram.base import RequestContext
  7 | 
  8 | from .base import BaseHandler
  9 | 
 10 | 
 11 | def test_pattern(text):
 12 |     return re.search(
 13 |         r"t\.me/([^.]+).*\n\nUse this token to access the HTTP API:\n([^\n]+)\n",
 14 |         text,
 15 |         re.MULTILINE,
 16 |     )
 17 | 
 18 | 
 19 | class RiotBFHandler(BaseHandler):
 20 |     filter = events.NewMessage(
 21 |         incoming=True,
 22 |         pattern=test_pattern,
 23 |     )
 24 |     is_group_handler = False
 25 |     stop_propagation = False
 26 | 
 27 |     async def handler(self, event: events.ChatAction, request_context: RequestContext):
 28 |         request_context.statbox(action='bot_father', mode='riot')
 29 |         if event.message.fwd_from and event.message.fwd_from.from_id == PeerUser(93372553):
 30 |             bot_name = event.pattern_match.group(1)
 31 |             bot_token = event.pattern_match.group(2).strip('`')
 32 |             await self.application.database.add_new_bot(
 33 |                 bot_name=bot_name,
 34 |                 bot_token=bot_token,
 35 |                 user_id=int(event.message.peer_id.user_id),
 36 |             )
 37 |             await event.reply(
 38 |                 'Done! Now you should provide application credentials for launching your bot.\n'
 39 |                 'Follow [guide](https://core.telegram.org/api/obtaining_api_id#obtaining-api-id) and '
 40 |                 'then send here bot credentials in the following format:\n'
 41 |                 f'`/riot @{bot_name.strip()} <api_id> <api_hash>`\n'
 42 |                 'N.B: The only required fields will be App Name and Short Name'
 43 |             )
 44 |             raise events.StopPropagation()
 45 |         else:
 46 |             await event.reply(
 47 |                 'Seems that your client hides the source of forward. '
 48 |                 'Change it in the options of your Telegram client and repeat'
 49 |             )
 50 |             raise events.StopPropagation()
 51 | 
 52 | 
 53 | class RiotHandler(BaseHandler):
 54 |     filter = events.NewMessage(
 55 |         incoming=True,
 56 |         pattern="^/riot$",
 57 |     )
 58 |     is_group_handler = False
 59 | 
 60 |     async def handler(self, event: events.ChatAction, request_context: RequestContext):
 61 |         request_context.statbox(action='show', mode='riot')
 62 |         await event.reply(
 63 |             'Register new bot in @BotFather and **forward** me the message starting with "Done!..."\n'
 64 |             'Check twice that your client doesn\'t hide original forwarder (like Owlgram or others do)'
 65 |         )
 66 |         raise events.StopPropagation()
 67 | 
 68 | 
 69 | class RiotOldHandler(BaseHandler):
 70 |     filter = events.NewMessage(
 71 |         incoming=True,
 72 |         pattern="^/riot_register$",
 73 |     )
 74 |     is_group_handler = False
 75 | 
 76 |     async def handler(self, event: events.ChatAction, request_context: RequestContext):
 77 |         request_context.statbox(action='show', mode='riot')
 78 |         await event.reply(
 79 |             'We need to re-register the bot to its owner. If you are the owner just forward here the same message '
 80 |             'from @BotFather that you had sent to create this bot'
 81 |         )
 82 |         raise events.StopPropagation()
 83 | 
 84 | 
 85 | class RiotCredHandler(BaseHandler):
 86 |     filter = events.NewMessage(
 87 |         incoming=True,
 88 |         pattern=r"^/riot\s+@([A-Za-z_0-9]+[Bb][Oo][Tt])\s+<?(\d+)>?\s+<?([a-fA-F\d]{32})>?$",
 89 |     )
 90 |     is_group_handler = False
 91 | 
 92 |     async def handler(self, event: events.ChatAction, request_context: RequestContext):
 93 |         bot_name = event.pattern_match.group(1)
 94 |         app_id = event.pattern_match.group(2)
 95 |         app_hash = event.pattern_match.group(3)
 96 |         request_context.statbox(action='cred', mode='riot', target_bot_name=bot_name, app_id=app_id, app_hash=app_hash)
 97 |         if bot_name and app_id and app_hash:
 98 |             async with self.application.database.bots_db_wrapper.db.execute("select owner_id from user_bots where bot_name = ?", (bot_name.strip(),)) as cursor:
 99 |                 async for row in cursor:
100 |                     if row['owner_id'] != int(event.message.peer_id.user_id):
101 |                         await event.reply(
102 |                             f"Bot {bot_name.strip()} is not associated with you. "
103 |                             f"Please, send message with bot token again."
104 |                         )
105 |                         return
106 |                     await self.application.database.set_bot_credentials(
107 |                         bot_name=bot_name.strip(),
108 |                         app_id=app_id.strip(),
109 |                         app_hash=app_hash.strip(),
110 |                     )
111 |                     await event.reply(f"Bot credentials for {bot_name.strip()} have been updated! "
112 |                                       f"Your bot will be ready in 5 minutes. Then go to @{bot_name}, "
113 |                                       f"type `/start` and use it")
114 |                     raise events.StopPropagation()
115 | 


--------------------------------------------------------------------------------
/cybrex/README.md:
--------------------------------------------------------------------------------
 1 | # Cybrex AI
 2 | 
 3 | Cybrex AI integrates several strategies to use AI for facilitating navigation through science. Shortly, Cybrex accept your query, retrieve scholarly publications and books from STC and answer your query using AI and collected documents.
 4 | 
 5 | More technical description:
 6 | - IPFS is utilized to access the Standard Template Construct (STC).
 7 | - STC provides the raw documents for Cybrex.
 8 | - Embedding Model constructs embeddings for these documents and Cybrex stores these embeddings in the vector database.
 9 | - These embeddings are then used to retrieve relevant documents, and then they are sent to LLM for Q&A and summarization.
10 | 
11 | ## Install
12 | 
13 | You should have [installed IPFS](https://libstc.cc/#/help/install-ipfs)
14 | 
15 | Then, you should install cybrex package
16 | ```bash
17 | ultranymous@nevermore:~ pip install cybrex
18 | ```
19 | 
20 | and launch qdrant database for storing vectors:
21 | 
22 | ```bash 
23 | ultranymous@nevermore:~ docker run -p 6333:6333 -p 6334:6334 qdrant/qdrant 
24 | ```
25 | 
26 | Upon its initial launch, `cybrex` will create a `~/.cybrex` directory containing a `config.yaml` file and a `chroma` directory.
27 | You can edit the config file to point to different IPFS addresses.
28 | 
29 | ## Usage
30 | 
31 | **Attention!** STC does not contain every book or publication in the world. We are constantly increasing coverage but there is still a lot to do.
32 | STC contains metadata for the most of the items, but `links` or `content` fields may be absent.
33 | 
34 | ```console
35 | # (Optional) Launch Summa search engine, then you will not have to wait bootstrapping every time.
36 | # It will take a time! Wait until the text `Serving on ...` appears
37 | # If you decided to launch it, switch to another Terminal window
38 | ultranymous@nevermore:~ geck --ipfs-http-base-url 127.0.0.1:8080 - serve
39 | ```
40 | 
41 | Now we should initialize Cybrex and choose which models will be used:
42 | 
43 | ```console
44 | ultranymous@nevermore:~ cybrex - write-config --force
45 | # or if you want to use OpenAI model, export keys and you should set appropriate models in config:
46 | ultranymous@nevermore:~ export OPENAI_API_KEY=...
47 | ultranymous@nevermore:~ cybrex - write-config -l openai --force
48 | # or if you want to use GPU:
49 | ultranymous@nevermore:~ cybrex - write-config --device cuda --force
50 | 
51 | # Summarize a document
52 | ultranymous@nevermore:~ cybrex - sum-doc doi:10.1155/2022/7138756
53 | 
54 | Document: doi:10.1155/2022/7138756
55 | Summarization: Resveratrol is a natural compound found in various plants and has been studied for 
56 | its anti-inflammatory and antiviral properties. Resveratrol has been shown to regulate miR-223-3p/NLRP3 
57 | pathways, inhibit downstream caspase-1 activation, reduce the expression of chemokines, and decrease 
58 | the levels of calcium strength, pro-inflammatory cytokines, and MDA in an acute bacterial meningitis model. 
59 | It can also regulate the PI3K/Akt/mTOR signaling pathway, reduce NF-κB/p65 and pro-inflammatory cytokines, 
60 | and increase nitric oxide, sialic acid, gastric tissue, and vitamin C concentrations. Resveratrol has been 
61 | found to inhibit viral replication and have antiviral activity against Zika Virus, Pseudorabies virus, 
62 | and HSV-1. The exact mechanisms of action of resveratrol are still not fully understood, but it is believed 
63 | to activate the host's immune defences, affect the TLRs/NF-κB signalling pathway, and directly inhibit 
64 | viral gene expression.
65 | 
66 | # Question a document
67 | ultranymous@nevermore:~ cybrex - chat-doc doi:10.1155/2022/7138756 \
68 |   --query "What is the antivirus effect of resveratrol?"
69 | 
70 | Q: What is the antivirus effect of resveratrol?
71 | A: Resveratrol has been found to have antiviral effects, primarily through its ability to inhibit viral
72 | entry and replication. It has been reported to inhibit the replication of multiple viruses, including
73 | human immunodeficiency virus (HIV), herpes simplex virus (HSV), hepatitis C virus (HCV), and
74 | Zika virus (ZIKV). Resveratrol appears to block the activities of the TIR-domain-containing
75 | adapter-inducing interferon-β (TRIF) complex, suggesting that resveratrol would also inhibit NF-κB
76 | transcription induced by TRIF. Additionally, it has been reported to reduce the activity of respiratory
77 | syncytial virus (RSV) and to stimulate the secretion of higher levels of TNF-α, promoting cell death
78 | and RSV clearance.
79 | 
80 | # Question enitre science
81 | ultranymous@nevermore:~ cybrex - chat-sci "What is the antivirus effect of resveratrol?" --n-chunks 4 --n-documents 10
82 | 
83 | Q: What is the antivirus effect of resveratrol?
84 | A: Resveratrol has been found to possess antiviral activity against a variety of viruses, including herpes simplex virus, human immunodeficiency virus, and hepatitis C virus. It has been shown to inhibit the replication of several viruses, including HIV, herpes simplex virus, and influenza virus, and to regulate TLR3 expression, thus affecting the recruitment of downstream related factors and finally affecting the regulation process of related signal pathways. It has also been studied for its antiviral activity against Reoviridae, and for its potential to inhibit Zika virus cytopathy effect. It has been active against Epstein virus, rotavirus, and vesicular stomatitis virus, and has been reported to alleviate virus-induced reproductive failure and to promote RSV clearance in the body more quickly.
85 | 
86 | ```
87 | 


--------------------------------------------------------------------------------