├── tgbot
├── app
│ ├── __init__.py
│ ├── aiosqlite_wrapper.py
│ └── exceptions.py
├── views
│ ├── __init__.py
│ └── telegram
│ │ ├── __init__.py
│ │ ├── document_list_widget.py
│ │ ├── common.py
│ │ └── progress_bar.py
├── widgets
│ └── __init__.py
├── configs
│ ├── development.yaml
│ ├── __init__.py
│ ├── logging.yaml
│ └── base.yaml
├── .gitignore
├── promotions
│ ├── __init__.py
│ └── promotions.yaml
├── handlers
│ ├── stop.py
│ ├── noop.py
│ ├── aboutus.py
│ ├── librarian.py
│ ├── help.py
│ ├── howtohelp.py
│ ├── shortlink.py
│ ├── close.py
│ ├── report.py
│ ├── roll.py
│ ├── q.py
│ ├── view.py
│ ├── start.py
│ ├── submit.py
│ ├── vote.py
│ ├── mlt.py
│ ├── cybrex.py
│ └── riot.py
├── translations
│ └── __init__.py
├── requirements.txt
├── Dockerfile
├── main.py
├── README.md
└── markdownifytg.py
├── web
├── .prettierrc.json
├── .eslintignore
├── service-worker.js
├── public
│ ├── favicon.ico
│ ├── favicon-dark.png
│ ├── mstile-70x70.png
│ ├── default-cover.jpg
│ ├── favicon-16x16.png
│ ├── favicon-32x32.png
│ ├── favicon-light.png
│ ├── mstile-144x144.png
│ ├── mstile-150x150.png
│ ├── mstile-310x150.png
│ ├── mstile-310x310.png
│ ├── apple-touch-icon.png
│ ├── android-chrome-192x192.png
│ ├── android-chrome-512x512.png
│ ├── android-chrome-maskable-192x192.png
│ ├── android-chrome-maskable-512x512.png
│ ├── browserconfig.xml
│ ├── sitemap.xml
│ ├── site.webmanifest
│ ├── favicon-black.svg
│ ├── favicon.svg
│ └── safari-pinned-tab.svg
├── src
│ ├── services
│ │ ├── search
│ │ │ ├── index.ts
│ │ │ ├── search-service.ts
│ │ │ └── query-processor.ts
│ │ ├── index.ts
│ │ └── user-service.ts
│ ├── assets
│ │ └── origin.jpg
│ ├── views
│ │ ├── HowToSearchView.vue
│ │ ├── InstallIpfsView.vue
│ │ ├── DoomsdayView.vue
│ │ ├── IntroView.vue
│ │ ├── StcHubApiView.vue
│ │ ├── StcBoxView.vue
│ │ ├── DonateView.vue
│ │ ├── DocumentView.vue
│ │ ├── Reader.vue
│ │ └── BookmarksView.vue
│ ├── components
│ │ ├── TagsList.vue
│ │ ├── LoadingSpinner.vue
│ │ ├── QrCode.vue
│ │ ├── ReferencesList.vue
│ │ ├── SearchList.vue
│ │ ├── ConnectivityIssues.vue
│ │ ├── DocumentButtons.vue
│ │ ├── DjvuReader.vue
│ │ ├── EpubReader.vue
│ │ ├── DocumentSnippet.vue
│ │ ├── download-progress.ts
│ │ └── PdfReader.vue
│ ├── main.ts
│ ├── App.vue
│ ├── database.ts
│ ├── router
│ │ └── index.ts
│ ├── utils.ts
│ └── scss
│ │ └── styles.scss
├── summa-config.json
├── tsconfig.config.json
├── .gitignore
├── env.d.ts
├── vite-sw.config.ts
├── publi.sh
├── tsconfig.json
├── README.md
├── index.html
├── .eslintrc.js
├── vite.config.ts
└── package.json
├── cybrex
├── cybrex
│ ├── __init__.py
│ ├── chains
│ │ ├── base.py
│ │ ├── __init__.py
│ │ └── map_reduce.py
│ ├── prompts
│ │ └── __init__.py
│ ├── vector_storage
│ │ ├── __init__.py
│ │ └── base.py
│ ├── exceptions.py
│ ├── data_source
│ │ ├── base.py
│ │ └── geck_data_source.py
│ ├── utils.py
│ └── llm_manager.py
├── MANIFEST.in
├── .gitignore
├── .isort.cfg
├── .flake8
├── requirements.txt
├── pyproject.toml
├── examples
│ ├── on-the-fly-translation.ipynb
│ └── analyse-references.ipynb
└── README.md
├── geck
├── stc_geck
│ ├── __init__.py
│ ├── exceptions.py
│ └── utils.py
├── MANIFEST.in
├── .gitignore
├── .isort.cfg
├── .flake8
├── requirements.txt
└── pyproject.toml
├── library
├── sciparse
│ ├── __init__.py
│ ├── models
│ │ ├── .gitignore
│ │ └── lid.176.ftz
│ ├── exceptions.py
│ ├── language_detect.py
│ └── cli.py
├── telegram
│ ├── README.md
│ ├── __init__.py
│ ├── session_backend
│ │ ├── __init__.py
│ │ └── core_postgres.py
│ ├── common.py
│ ├── promotioner.py
│ └── utils.py
├── .gitignore
├── user_manager
│ ├── __init__.py
│ └── user_manager.py
└── textutils
│ ├── __init__.py
│ ├── html_processing.py
│ └── utils.py
├── .flake8
├── .gitignore
├── .isort.cfg
├── .env.light
└── docker-compose.light.yml
/tgbot/app/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tgbot/views/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/web/.prettierrc.json:
--------------------------------------------------------------------------------
1 | {}
--------------------------------------------------------------------------------
/cybrex/cybrex/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/cybrex/cybrex/chains/base.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/geck/stc_geck/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/library/sciparse/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/library/telegram/README.md:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/library/telegram/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tgbot/widgets/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/cybrex/cybrex/chains/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/cybrex/cybrex/prompts/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tgbot/views/telegram/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tgbot/configs/development.yaml:
--------------------------------------------------------------------------------
1 | ---
2 |
--------------------------------------------------------------------------------
/cybrex/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
--------------------------------------------------------------------------------
/cybrex/cybrex/vector_storage/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/geck/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
--------------------------------------------------------------------------------
/library/sciparse/models/.gitignore:
--------------------------------------------------------------------------------
1 | lid.176.bin
--------------------------------------------------------------------------------
/cybrex/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg-info
2 | __pycache__
3 | dist
--------------------------------------------------------------------------------
/geck/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg-info
2 | __pycache__
3 | dist
--------------------------------------------------------------------------------
/tgbot/.gitignore:
--------------------------------------------------------------------------------
1 | bots.db
2 | configs/production.yaml
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | exclude = venv/*
3 | max-line-length = 160
--------------------------------------------------------------------------------
/library/.gitignore:
--------------------------------------------------------------------------------
1 | actions
2 | integral
3 | pdftools
4 | siteparsers
--------------------------------------------------------------------------------
/web/.eslintignore:
--------------------------------------------------------------------------------
1 | .eslintrc.js
2 | public/*
3 | service-worker.js
--------------------------------------------------------------------------------
/web/service-worker.js:
--------------------------------------------------------------------------------
1 | node_modules/summa-wasm/dist/service-worker.js
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | fabrica
3 | infra
4 | venv
5 | docker-compose.yml
6 | __pycache__
7 |
--------------------------------------------------------------------------------
/web/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/favicon.ico
--------------------------------------------------------------------------------
/web/src/services/search/index.ts:
--------------------------------------------------------------------------------
1 | export {IpfsSearchService} from './ipfs-search-service'
2 |
--------------------------------------------------------------------------------
/web/public/favicon-dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/favicon-dark.png
--------------------------------------------------------------------------------
/web/public/mstile-70x70.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/mstile-70x70.png
--------------------------------------------------------------------------------
/web/src/assets/origin.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/src/assets/origin.jpg
--------------------------------------------------------------------------------
/web/public/default-cover.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/default-cover.jpg
--------------------------------------------------------------------------------
/web/public/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/favicon-16x16.png
--------------------------------------------------------------------------------
/web/public/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/favicon-32x32.png
--------------------------------------------------------------------------------
/web/public/favicon-light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/favicon-light.png
--------------------------------------------------------------------------------
/web/public/mstile-144x144.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/mstile-144x144.png
--------------------------------------------------------------------------------
/web/public/mstile-150x150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/mstile-150x150.png
--------------------------------------------------------------------------------
/web/public/mstile-310x150.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/mstile-310x150.png
--------------------------------------------------------------------------------
/web/public/mstile-310x310.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/mstile-310x310.png
--------------------------------------------------------------------------------
/library/user_manager/__init__.py:
--------------------------------------------------------------------------------
1 | from .user_manager import UserManager
2 |
3 | __all__ = ['UserManager']
4 |
--------------------------------------------------------------------------------
/web/public/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/apple-touch-icon.png
--------------------------------------------------------------------------------
/library/sciparse/models/lid.176.ftz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/library/sciparse/models/lid.176.ftz
--------------------------------------------------------------------------------
/web/public/android-chrome-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/android-chrome-192x192.png
--------------------------------------------------------------------------------
/web/public/android-chrome-512x512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/android-chrome-512x512.png
--------------------------------------------------------------------------------
/web/src/services/index.ts:
--------------------------------------------------------------------------------
1 | export { IpfsSearchService } from './search'
2 | export { UserService } from './user-service'
3 |
--------------------------------------------------------------------------------
/geck/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | skip_glob=**/venv/**
3 | include_trailing_comma=True
4 | multi_line_output=3
5 | force_grid_wrap=2
6 |
--------------------------------------------------------------------------------
/cybrex/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | skip_glob=**/venv/**
3 | include_trailing_comma=True
4 | multi_line_output=3
5 | force_grid_wrap=2
6 |
--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | skip_glob=**/venv/**
3 | include_trailing_comma=True
4 | multi_line_output=3
5 | force_grid_wrap=2
6 | line_length=120
--------------------------------------------------------------------------------
/library/sciparse/exceptions.py:
--------------------------------------------------------------------------------
1 | from aiobaseclient.exceptions import BadRequestError
2 |
3 | __all__ = [
4 | 'BadRequestError',
5 | ]
6 |
--------------------------------------------------------------------------------
/library/telegram/session_backend/__init__.py:
--------------------------------------------------------------------------------
1 | from .sqlalchemy import AlchemySessionContainer
2 |
3 | __all__ = ['AlchemySessionContainer']
4 |
--------------------------------------------------------------------------------
/web/public/android-chrome-maskable-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/android-chrome-maskable-192x192.png
--------------------------------------------------------------------------------
/web/public/android-chrome-maskable-512x512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nexus-stc/stc/HEAD/web/public/android-chrome-maskable-512x512.png
--------------------------------------------------------------------------------
/cybrex/cybrex/exceptions.py:
--------------------------------------------------------------------------------
1 | from izihawa_utils.exceptions import BaseError
2 |
3 |
4 | class QdrantStorageNotAvailableError(BaseError):
5 | pass
6 |
--------------------------------------------------------------------------------
/cybrex/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | exclude =
3 | .git,
4 | __pycache__,
5 | venv,
6 | build,
7 | dist,
8 | ignore = I, W503
9 | max-line-length = 140
10 |
--------------------------------------------------------------------------------
/geck/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | exclude =
3 | .git,
4 | __pycache__,
5 | venv,
6 | build,
7 | dist,
8 | ignore = I, W503
9 | max-line-length = 140
10 |
--------------------------------------------------------------------------------
/web/summa-config.json:
--------------------------------------------------------------------------------
1 | {
2 | "index": "bafyb4iadbza7ckc3djc2k5lfaorwaufcjurzxzkjsj5e7qt2wrguqs7ywm",
3 | "ipfs_api_multiaddr": "/ip4/10.1.2.3/tcp/5001",
4 | "ipfs_http_base_url": "http://10.1.2.3:8080"
5 | }
6 |
--------------------------------------------------------------------------------
/tgbot/promotions/__init__.py:
--------------------------------------------------------------------------------
1 | from izihawa_configurator import Configurator
2 |
3 |
4 | def get_promotions():
5 | return Configurator(['tgbot/promotions/promotions.yaml'])['promotions']
6 |
7 |
8 | promotions = get_promotions()
9 |
--------------------------------------------------------------------------------
/web/src/services/user-service.ts:
--------------------------------------------------------------------------------
1 | export class UserService {
2 | liked_items: string[]
3 |
4 | constructor () {
5 | this.liked_items = []
6 | }
7 |
8 | like (item: string) {
9 | this.liked_items.push(item)
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/geck/requirements.txt:
--------------------------------------------------------------------------------
1 | aiohttp>=3.8.5
2 | aiokit>=1.2.3
3 | aiosumma>=2.47.1
4 | humanfriendly>=10.0
5 | ipfs-hamt-directory-py>=0.1.1
6 | izihawa-ipfs-api>=1.0.7
7 | izihawa-utils>=1.1.3
8 | multidict>=6.0.4
9 | summa-embed>=0.20.2
10 | termcolor>=2.3.0
11 | fire>=0.5.0
--------------------------------------------------------------------------------
/web/public/browserconfig.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | #ffc40d
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/cybrex/cybrex/vector_storage/base.py:
--------------------------------------------------------------------------------
1 | from typing import (
2 | Iterable,
3 | List,
4 | Optional,
5 | Tuple,
6 | )
7 |
8 |
9 | class BaseVectorStorage:
10 | def query(self, query_embedding: List[float], n_chunks: int, field_values: Optional[Iterable[Tuple[str, str]]] = None):
11 | raise NotImplementedError()
12 |
--------------------------------------------------------------------------------
/web/public/sitemap.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | https://libstc.cc/
5 | 2023-04-12
6 |
7 |
8 | https://libstc.cc/#/about
9 | 2023-04-12
10 |
11 |
12 |
--------------------------------------------------------------------------------
/geck/stc_geck/exceptions.py:
--------------------------------------------------------------------------------
1 | from izihawa_utils.exceptions import BaseError
2 |
3 |
4 | class IpfsConnectionError(BaseError):
5 | pass
6 |
7 |
8 | class ItemNotFound(BaseError):
9 | def __init__(self, query):
10 | self.query = query
11 |
12 |
13 | class CidNotFound(BaseError):
14 | def __init__(self, query):
15 | self.query = query
16 |
--------------------------------------------------------------------------------
/tgbot/configs/__init__.py:
--------------------------------------------------------------------------------
1 | from izihawa_configurator import Configurator
2 | from izihawa_utils import env
3 |
4 |
5 | def get_config():
6 | return Configurator([
7 | 'tgbot/configs/base.yaml',
8 | 'tgbot/configs/%s.yaml?' % env.type,
9 | 'tgbot/configs/logging.yaml',
10 | ], env_prefix='STC_TGBOT')
11 |
12 |
13 | config = get_config()
14 |
--------------------------------------------------------------------------------
/library/telegram/common.py:
--------------------------------------------------------------------------------
1 | from telethon import Button
2 |
3 |
4 | def close_button(session_id: str = None):
5 | if session_id:
6 | return Button.inline(
7 | text='✖️',
8 | data=f'/close_{session_id}',
9 | )
10 | else:
11 | return Button.inline(
12 | text='✖️',
13 | data='/close',
14 | )
15 |
--------------------------------------------------------------------------------
/web/tsconfig.config.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "@tsconfig/node18/tsconfig.json",
3 | "include": [
4 | "summa-config.ts",
5 | "vite.config.ts",
6 | "vite-sw.config.ts",
7 | "vitest.config.ts",
8 | "cypress.config.*"
9 | ],
10 | "compilerOptions": {
11 | "composite": true,
12 | "moduleResolution": "Node",
13 | "resolveJsonModule": true,
14 | "types": ["node"]
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/tgbot/handlers/stop.py:
--------------------------------------------------------------------------------
1 | from telethon import events
2 |
3 | from library.telegram.base import RequestContext
4 |
5 | from .base import BaseHandler
6 |
7 |
8 | class StopHandler(BaseHandler):
9 | filter = events.NewMessage(incoming=True, pattern='^/stop$')
10 |
11 | async def handler(self, event: events.ChatAction, request_context: RequestContext):
12 | request_context.statbox(action='show', mode='stop')
13 |
--------------------------------------------------------------------------------
/.env.light:
--------------------------------------------------------------------------------
1 | COMPOSE_PATH_SEPARATOR=:
2 | COMPOSE_FILE=docker-compose.light.yml
3 | COMPOSE_PROJECT_NAME=light
4 |
5 | # Retrieve next two parameters at https://my.telegram.org
6 | STC_TGBOT_application.default_bot.app_id=...
7 | STC_TGBOT_application.default_bot.app_hash=...
8 |
9 | # Register your bot at @BotFather in Telegram
10 | STC_TGBOT_application.default_bot.bot_name=...
11 | STC_TGBOT_application.default_bot.bot_token=...
12 |
--------------------------------------------------------------------------------
/web/.gitignore:
--------------------------------------------------------------------------------
1 | # Logs
2 | logs
3 | *.log
4 | npm-debug.log*
5 | yarn-debug.log*
6 | yarn-error.log*
7 | pnpm-debug.log*
8 | lerna-debug.log*
9 |
10 | node_modules
11 | .DS_Store
12 | dist
13 | dist-ssr
14 | coverage
15 | *.local
16 |
17 | /cypress/videos/
18 | /cypress/screenshots/
19 |
20 | # Editor directories and files
21 | .vscode/*
22 | !.vscode/extensions.json
23 | .idea
24 | *.suo
25 | *.ntvs*
26 | *.njsproj
27 | *.sln
28 | *.sw?
29 |
--------------------------------------------------------------------------------
/cybrex/requirements.txt:
--------------------------------------------------------------------------------
1 | aiokit>=1.2.3
2 | beautifulsoup4>=4.12.2
3 | ctransformers>=0.2.17
4 | FlagEmbedding>=1.1.2
5 | InstructorEmbedding>=1.0.1
6 | izihawa-configurator>=1.0.4
7 | izihawa-utils>=1.1.3
8 | keybert>=0.7.0
9 | langchain>=0.0.222
10 | lazy>=1.5
11 | lxml>=4.9.3
12 | openai>=0.27.8
13 | orjson
14 | pypdf>=3.12.0
15 | pyyaml>=6.0
16 | qdrant_client>=1.5.4
17 | tiktoken>=0.5.1
18 | safetensors==0.3.1
19 | stc-geck>=1.8.35
20 | unstructured[html]>=0.10.28
21 |
--------------------------------------------------------------------------------
/tgbot/handlers/noop.py:
--------------------------------------------------------------------------------
1 | from telethon import events
2 |
3 | from library.telegram.base import RequestContext
4 |
5 | from .base import BaseCallbackQueryHandler
6 |
7 |
8 | class NoopHandler(BaseCallbackQueryHandler):
9 | filter = events.CallbackQuery(pattern='^/noop$')
10 |
11 | async def handler(self, event: events.ChatAction, request_context: RequestContext):
12 | request_context.statbox(action='start', mode='noop')
13 | await event.answer()
14 |
--------------------------------------------------------------------------------
/web/env.d.ts:
--------------------------------------------------------------------------------
1 | import 'vite/client'
2 |
3 | import { type SearchService } from '@/services/summa'
4 |
5 | declare module '@vue/runtime-core' {
6 | interface ComponentCustomProperties {
7 | search_service: SearchService
8 | }
9 | }
10 |
11 | declare global {
12 | namespace NodeJS {
13 | interface ProcessEnv {
14 | GITHUB_AUTH_TOKEN: string
15 | NODE_ENV: 'development' | 'production'
16 | PORT?: string
17 | PWD: string
18 | }
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/web/src/views/HowToSearchView.vue:
--------------------------------------------------------------------------------
1 |
2 | .container
3 | h2 {{ get_label('how_to_search') }}
4 | span(v-html="get_label('help_content')")
5 |
6 |
16 |
--------------------------------------------------------------------------------
/web/src/views/InstallIpfsView.vue:
--------------------------------------------------------------------------------
1 |
2 | .container
3 | h3 {{ get_label('install_ipfs') }}
4 | span(v-html="get_label('install_ipfs_content')")
5 |
6 |
17 |
--------------------------------------------------------------------------------
/tgbot/translations/__init__.py:
--------------------------------------------------------------------------------
1 | from izihawa_configurator import Configurator
2 |
3 |
4 | def get_translations():
5 | return Configurator([
6 | 'tgbot/translations/translations.yaml',
7 | ])
8 |
9 |
10 | def t(label, language='en'):
11 | if language in _translations and label in _translations[language]:
12 | return _translations[language][label]
13 | return _translations['en'][label]
14 |
15 |
16 | _translations = get_translations()
17 |
18 |
19 | __all__ = ['t']
20 |
--------------------------------------------------------------------------------
/web/src/components/TagsList.vue:
--------------------------------------------------------------------------------
1 |
2 | div
3 | span(v-for="(tag, index) in tags")
4 | span.ms-1.me-1(v-if="index > 0") -
5 | router-link.text-decoration-none(:to="'/?q=tags:\"' + tag + '\"'") {{ tag }}
6 |
7 |
8 |
20 |
--------------------------------------------------------------------------------
/tgbot/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4
2 | lxml
3 | pandas
4 | fasttext-wheel
5 | PyCryptodome
6 | pypdf>=3.17.0
7 | seaborn
8 |
9 | aiobaseclient
10 | aiocrossref
11 | aiokit>=1.2.3
12 | aiosqlite
13 | aiosumma>=2.47.3
14 | bleach
15 | base36
16 | cybrex[petals]>=1.11.11
17 | dateparser
18 | emoji
19 | isbnlib>=3.10.13
20 | izihawa_configurator>=1.0.4
21 | izihawa_ipfs_api>=1.0.0
22 | izihawa_loglib>=1.0.2
23 | izihawa_utils
24 | lru-dict
25 | markdownify
26 | sqlalchemy
27 | stc-geck>=1.8.38
28 |
29 | telethon==1.30.3
30 |
--------------------------------------------------------------------------------
/tgbot/app/aiosqlite_wrapper.py:
--------------------------------------------------------------------------------
1 | import aiosqlite
2 | from aiokit import AioThing
3 |
4 |
5 | def dict_factory(cursor, row):
6 | d = {}
7 | for idx, col in enumerate(cursor.description):
8 | d[col[0]] = row[idx]
9 | return d
10 |
11 |
12 | class AioSqlite(AioThing):
13 | def __init__(self, db_name):
14 | super().__init__()
15 | self.db = aiosqlite.connect(db_name)
16 |
17 | async def start(self):
18 | self.db = await self.db
19 | self.db.row_factory = dict_factory
20 |
--------------------------------------------------------------------------------
/docker-compose.light.yml:
--------------------------------------------------------------------------------
1 | services:
2 | ipfs:
3 | image: ipfs/kubo:latest
4 | environment:
5 | IPFS_PROFILE: server
6 | ports:
7 | - 8080:8080
8 | volumes:
9 | - /Users/pasha/data-ipfs:/data/ipfs
10 | tgbot:
11 | build:
12 | context: .
13 | dockerfile: tgbot/Dockerfile
14 | depends_on:
15 | ipfs:
16 | condition: service_healthy
17 | env_file:
18 | - .env.light
19 | restart: always
20 | volumes:
21 | - /Users/pasha/tmp:/usr/lib/stc-tgbot
22 |
--------------------------------------------------------------------------------
/web/public/site.webmanifest:
--------------------------------------------------------------------------------
1 | {
2 | "name": "",
3 | "short_name": "",
4 | "icons": [
5 | {
6 | "src": "./android-chrome-192x192.png",
7 | "sizes": "192x192",
8 | "type": "image/png"
9 | },
10 | {
11 | "src": "./android-chrome-512x512.png",
12 | "sizes": "512x512",
13 | "type": "image/png"
14 | }
15 | ],
16 | "theme_color": "#ffffff",
17 | "background_color": "#ffffff",
18 | "display": "standalone"
19 | }
20 |
--------------------------------------------------------------------------------
/web/vite-sw.config.ts:
--------------------------------------------------------------------------------
1 | import { defineConfig } from 'vite'
2 |
3 | // https://vitejs.dev/config/
4 | export default defineConfig({
5 | base: '',
6 | build: {
7 | emptyOutDir: false,
8 | rollupOptions: {
9 | input: {
10 | 'service-worker': './node_modules/summa-wasm/dist/service-worker.js',
11 | },
12 | output: [
13 | {
14 | entryFileNames: () => {
15 | return '[name].js'
16 | }
17 | }
18 | ]
19 | },
20 | target: 'esnext'
21 | }
22 | })
23 |
--------------------------------------------------------------------------------
/web/src/views/DoomsdayView.vue:
--------------------------------------------------------------------------------
1 |
2 | .container
3 | h3 Doomsday Guide
4 | p In the event of an extinction-level occurrence (such as a nuclear war, global pandemic, or impact event), this space will provide links to a rescue kit and literature on the subject of civilization restoration.
5 |
6 |
15 |
--------------------------------------------------------------------------------
/tgbot/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG MODE
2 |
3 | FROM python:3.11-slim as builder-common
4 | RUN apt-get update \
5 | && apt-get install gcc g++ git golang -y \
6 | && apt-get clean
7 | WORKDIR /app
8 | ADD tgbot/requirements.txt tgbot/requirements.txt
9 | RUN python3 -m venv venv
10 | RUN venv/bin/pip3 install -r tgbot/requirements.txt
11 | COPY fabrica fabrica
12 | COPY library library
13 | COPY tgbot tgbot
14 | COPY infra/hub/aioclient infra/hub/aioclient
15 | COPY infra/hub/proto infra/hub/proto
16 | ENV PYTHONPATH=/app
17 | RUN mkdir /usr/lib/stc-tgbot
18 | RUN mkdir /var/log/stc-tgbot
19 | CMD ["/app/venv/bin/python3", "tgbot/main.py"]
--------------------------------------------------------------------------------
/web/publi.sh:
--------------------------------------------------------------------------------
1 | npm run build-only
2 |
3 | API_ADDR=($(jq -r '.ipfs_api_multiaddr' summa-config.json))
4 | echo Adding dist...
5 | DIST_CID=$(ipfs --api $API_ADDR add --pin -Q -r --hash=blake3 dist)
6 | echo Settings MFS...
7 | ipfs --api $API_ADDR files rm -r /stc-web
8 | ipfs --api $API_ADDR files cp /ipfs/"$DIST_CID" /stc-web
9 | INDEX_CID=$(jq -r -c '.index' summa-config.json)
10 | ipfs --api $API_ADDR files cp -p /ipfs/$INDEX_CID /stc-web/data
11 | ipfs --api $API_ADDR files cp -p /ipfs/bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze/I /stc-web/images/wiki
12 | ipfs --api $API_ADDR files stat --hash /stc-web
13 |
--------------------------------------------------------------------------------
/cybrex/cybrex/data_source/base.py:
--------------------------------------------------------------------------------
1 | from dataclasses import dataclass
2 | from typing import (
3 | List,
4 | Optional,
5 | )
6 |
7 |
8 | @dataclass
9 | class SourceDocument:
10 | document: dict
11 | document_id: str
12 |
13 |
14 | class BaseDataSource:
15 | async def stream_documents(
16 | self,
17 | query: str,
18 | limit: int = 0,
19 | ) -> List[SourceDocument]:
20 | raise NotImplementedError()
21 |
22 | async def search_documents(self, query: str, limit: int = 5, sources: Optional[List[str]] = None) -> List[SourceDocument]:
23 | raise NotImplementedError()
24 |
--------------------------------------------------------------------------------
/web/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "@vue/tsconfig/tsconfig.dom.json",
3 | "include": [
4 | "env.d.ts",
5 | "src/**/*.ts",
6 | "src/**/*.vue",
7 | "summa-config.ts",
8 | "vite.config.ts",
9 | "vite-sw.config.ts"
10 | ],
11 | "compilerOptions": {
12 | "baseUrl": ".",
13 | "esModuleInterop": true,
14 | "moduleResolution": "Node",
15 | "paths": {
16 | "@/*": ["./src/*"]
17 | },
18 | "resolveJsonModule": true,
19 | "strict": false,
20 | "types": ["node"]
21 | },
22 |
23 | "references": [
24 | {
25 | "path": "./tsconfig.config.json"
26 | }
27 | ]
28 | }
29 |
--------------------------------------------------------------------------------
/web/README.md:
--------------------------------------------------------------------------------
1 | # Web STC
2 |
3 | Search engine in your browser that can retrieve all data through IPFS. Uncensorable, unblockable, yours.
4 | Original instance of STC lives at http://libstc.cc
5 |
6 | Here you can find its source codes and make a contribution if you are a skilled developer.
7 |
8 | ## Development
9 |
10 | It requires [IPFS to be installed](https://docs.ipfs.tech/install/ipfs-desktop/) and launched.
11 |
12 | ```bash
13 | npm i
14 | npm run dev
15 | ```
16 |
17 | ## Publishing
18 |
19 | Publishing here means
20 | - Build static site
21 | - Create IPFS directory with the static site and links to data batteries
22 |
23 | It can be done with `bash publi.sh`
24 |
--------------------------------------------------------------------------------
/library/sciparse/language_detect.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | from typing import Dict
3 |
4 | import fasttext
5 |
6 | if os.path.exists('./library/sciparse/models/lid.176.bin'):
7 | path_to_pretrained_model = './library/sciparse/models/lid.176.bin'
8 | fmodel = fasttext.load_model(path_to_pretrained_model)
9 | else:
10 | path_to_pretrained_model = './library/sciparse/models/lid.176.ftz'
11 | fmodel = fasttext.load_model(path_to_pretrained_model)
12 |
13 |
14 | def detect_language(text: str, threshold: float = 0.85) -> Dict[str, float]:
15 | prediction = fmodel.predict([text.replace('\n', ' ')], threshold=threshold)
16 | if prediction[0][0]:
17 | return prediction[0][0][0][-2:]
18 |
--------------------------------------------------------------------------------
/web/src/components/LoadingSpinner.vue:
--------------------------------------------------------------------------------
1 |
2 | div(v-if="is_launched")
3 | .d-flex.justify-content-center
4 | .spinner-border(role="status")
5 | .d-flex.justify-content-center.m-5(v-if="label")
6 | div {{ label }}
7 |
8 |
9 |
32 |
--------------------------------------------------------------------------------
/geck/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools<65.0"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "stc-geck"
7 | version = "1.8.38"
8 | authors = [{ name = "Interdimensional Walker" }]
9 | description = "GECK (Garden Of Eden Creation Kit) is a toolkit for setting up and maintaning STC"
10 | readme = "README.md"
11 | requires-python = ">=3.8"
12 | classifiers = [
13 | "Programming Language :: Python :: 3.8",
14 | ]
15 | dynamic = ["dependencies"]
16 |
17 | [project.scripts]
18 | geck = "stc_geck.cli:main"
19 |
20 | [project.urls]
21 | "Homepage" = "https://github.com/nexus-stc/stc"
22 |
23 | [tool.setuptools.dynamic]
24 | dependencies = {file = ["requirements.txt"]}
25 |
26 |
--------------------------------------------------------------------------------
/cybrex/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools<65.0"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "cybrex"
7 | version = "1.12.10"
8 | authors = [{ name = "Interdimensional Walker" }]
9 | description = "Researching AI"
10 | readme = "README.md"
11 | requires-python = ">=3.8"
12 | classifiers = [
13 | "Programming Language :: Python :: 3.8",
14 | ]
15 | dynamic = ["dependencies"]
16 |
17 | [project.scripts]
18 | cybrex = "cybrex.cli:main"
19 |
20 | [project.urls]
21 | "Homepage" = "https://github.com/nexus-stc/stc"
22 |
23 | [project.optional-dependencies]
24 | petals = ["petals>=2.0.0"]
25 |
26 | [tool.setuptools.dynamic]
27 | dependencies = {file = ["requirements.txt"]}
28 |
29 |
--------------------------------------------------------------------------------
/cybrex/cybrex/utils.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 |
3 |
4 | class MultipleAsyncExecution:
5 | def __init__(self, par):
6 | self.par = par
7 | self.s = asyncio.Semaphore(par)
8 |
9 | async def execute(self, coro):
10 | if not self.s:
11 | raise RuntimeError('`ParallelAsyncExecution` has been already joined')
12 | await self.s.acquire()
13 | task = asyncio.create_task(coro)
14 | task.add_done_callback(lambda f: self.s.release())
15 | return task
16 |
17 | async def join(self):
18 | for i in range(self.par):
19 | await self.s.acquire()
20 | s = self.s
21 | self.s = None
22 | for i in range(self.par):
23 | s.release()
24 |
--------------------------------------------------------------------------------
/tgbot/handlers/aboutus.py:
--------------------------------------------------------------------------------
1 | from telethon import (
2 | Button,
3 | events,
4 | )
5 |
6 | from library.telegram.base import RequestContext
7 | from tgbot.translations import t
8 |
9 | from .base import BaseHandler
10 |
11 |
12 | class AboutusHandler(BaseHandler):
13 | filter = events.NewMessage(incoming=True, pattern='^/aboutus(@[A-Za-z0-9_]+)?$')
14 | is_group_handler = True
15 |
16 | async def handler(self, event: events.ChatAction, request_context: RequestContext):
17 | request_context.statbox(action='show', mode='aboutus')
18 | await event.reply(
19 | t('ABOUT_US', request_context.chat['language']),
20 | buttons=Button.clear(),
21 | link_preview=False,
22 | )
23 |
--------------------------------------------------------------------------------
/tgbot/handlers/librarian.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from telethon import events
4 |
5 | from library.telegram.base import RequestContext
6 |
7 | from .base import BaseHandler
8 |
9 |
10 | class LibrarianTextHandler(BaseHandler):
11 | filter = events.NewMessage(incoming=True, pattern=re.compile(r'(.*)', flags=re.DOTALL))
12 | is_group_handler = True
13 |
14 | async def handler(self, event: events.ChatAction, request_context: RequestContext):
15 | session_id = self.generate_session_id()
16 | request_context.add_default_fields(mode='librarian_text', session_id=session_id)
17 | user_id = event.sender_id
18 |
19 | if user_id not in self.application.config['librarian']['moderators']:
20 | await event.delete()
21 |
--------------------------------------------------------------------------------
/tgbot/main.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import logging
3 | from concurrent.futures import ThreadPoolExecutor
4 |
5 | from app.application import TelegramApplication
6 | from configs import get_config
7 | from izihawa_loglib import configure_logging
8 |
9 |
10 | def main(config):
11 | configure_logging(config)
12 | loop = asyncio.new_event_loop()
13 | loop.set_default_executor(ThreadPoolExecutor(64))
14 | asyncio.set_event_loop(loop)
15 | loop.run_until_complete(TelegramApplication(config=config).start_and_wait())
16 | asyncio.get_running_loop().stop()
17 | logging.getLogger('statbox').info({
18 | 'mode': 'application',
19 | 'action': 'exit',
20 | })
21 |
22 |
23 | if __name__ == '__main__':
24 | main(config=get_config())
25 |
--------------------------------------------------------------------------------
/web/src/components/QrCode.vue:
--------------------------------------------------------------------------------
1 |
2 | div.favicon-inversion-filter.text-center(id="qr-code")
3 |
4 |
5 |
32 |
--------------------------------------------------------------------------------
/tgbot/handlers/help.py:
--------------------------------------------------------------------------------
1 | from telethon import (
2 | Button,
3 | events,
4 | )
5 |
6 | from library.telegram.base import RequestContext
7 | from tgbot.translations import t
8 |
9 | from .base import BaseHandler
10 |
11 |
12 | class HelpHandler(BaseHandler):
13 | filter = events.NewMessage(incoming=True, pattern='^/help(@[A-Za-z0-9_]+)?$')
14 | is_group_handler = True
15 |
16 | async def handler(self, event: events.ChatAction, request_context: RequestContext):
17 | request_context.statbox(action='show', mode='help')
18 |
19 | if event.is_group or event.is_channel:
20 | if event.pattern_match.group(1) == f'@{request_context.bot_name}':
21 | await event.reply(t('HELP_FOR_GROUPS', request_context.chat['language']), buttons=Button.clear())
22 | else:
23 | await event.reply(t('HELP', request_context.chat['language']), buttons=Button.clear())
24 |
--------------------------------------------------------------------------------
/tgbot/handlers/howtohelp.py:
--------------------------------------------------------------------------------
1 | from telethon import events
2 |
3 | from library.telegram.base import RequestContext
4 | from tgbot.configs import config
5 | from tgbot.translations import t
6 |
7 | from .base import BaseHandler
8 |
9 |
10 | class HowToHelpHandler(BaseHandler):
11 | filter = events.NewMessage(incoming=True, pattern='^/howtohelp(@[A-Za-z0-9_]+)?$')
12 | is_group_handler = True
13 |
14 | async def handler(self, event: events.ChatAction, request_context: RequestContext):
15 | request_context.statbox(action='show', mode='howtohelp')
16 | await event.reply(
17 | t('HOW_TO_HELP', request_context.chat['language']).format(
18 | reddit_url=config['reddit'].get('url', '🚫'),
19 | related_channel=config['telegram'].get('related_channel', '🚫'),
20 | twitter_contact_url=config['twitter'].get('contact_url', '🚫')
21 | ))
22 |
--------------------------------------------------------------------------------
/library/sciparse/cli.py:
--------------------------------------------------------------------------------
1 | import fire
2 | from aiobaseclient import BaseClient
3 | from aiokit.utils import sync_fu
4 | from izihawa_ipfs_api import IpfsHttpClient
5 |
6 | from library.sciparse.sciparser import (
7 | ClientPool,
8 | SciParser,
9 | )
10 |
11 |
12 | async def process(grobid_base_url, ipfs_base_url, doi):
13 | ipfs_http_client = IpfsHttpClient(base_url=ipfs_base_url)
14 | await ipfs_http_client.start()
15 | grobid_client = BaseClient(base_url=grobid_base_url)
16 | await grobid_client.start()
17 |
18 | sci_parser = SciParser(
19 | ipfs_http_client=ipfs_http_client,
20 | grobid_pool=ClientPool.from_client(grobid_client, par=16),
21 | )
22 | await sci_parser.start()
23 | parsed_paper = await sci_parser.parse_paper(doi)
24 | print(parsed_paper)
25 |
26 |
27 | def main():
28 | fire.Fire(sync_fu(process))
29 |
30 |
31 | if __name__ == '__main__':
32 | main()
33 |
--------------------------------------------------------------------------------
/tgbot/handlers/shortlink.py:
--------------------------------------------------------------------------------
1 | from telethon import events
2 |
3 | from library.telegram.base import RequestContext
4 | from tgbot.translations import t
5 | from tgbot.views.telegram.common import (
6 | TooLongQueryError,
7 | encode_query_to_deep_link,
8 | )
9 |
10 | from .base import BaseHandler
11 |
12 |
13 | class ShortlinkHandler(BaseHandler):
14 | filter = events.NewMessage(incoming=True, pattern='^/shortlink\\s?(.*)?')
15 |
16 | async def handler(self, event: events.ChatAction, request_context: RequestContext):
17 | query = event.pattern_match.group(1)
18 | request_context.statbox(action='start', mode='shortlink', query=query)
19 |
20 | try:
21 | text = encode_query_to_deep_link(query, request_context.bot_name)
22 | except TooLongQueryError:
23 | text = t('TOO_LONG_QUERY_FOR_SHORTLINK', request_context.chat['language'])
24 |
25 | return await event.reply(f'`{text}`', link_preview=False)
26 |
--------------------------------------------------------------------------------
/tgbot/app/exceptions.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | from izihawa_utils.exceptions import BaseError
4 |
5 |
6 | class BannedUserError(BaseError):
7 | level = logging.WARNING
8 | code = 'banned_user_error'
9 |
10 | def __init__(self, ban_timeout: int):
11 | self.ban_timeout = ban_timeout
12 |
13 |
14 | class UnknownFileFormatError(BaseError):
15 | level = logging.WARNING
16 | code = 'unknown_file_format_error'
17 |
18 |
19 | class UnknownIndexAliasError(BaseError):
20 | code = 'unknown_index_alias_error'
21 |
22 |
23 | class WidgetError(BaseError):
24 | level = logging.WARNING
25 | code = 'widget_error'
26 |
27 | def __init__(self, text, buttons):
28 | self.text = text
29 | self.buttons = buttons
30 |
31 |
32 | class DownloadError(BaseError):
33 | level = logging.WARNING
34 | code = 'download_error'
35 |
36 |
37 | class InvalidSearchError(BaseError):
38 | def __init__(self, search):
39 | self.search = search
40 |
--------------------------------------------------------------------------------
/web/src/components/ReferencesList.vue:
--------------------------------------------------------------------------------
1 |
2 | div
3 | loading-spinner(v-if="references === null")
4 | div(v-else)
5 | div(v-for="(reference, i) of references")
6 | document-snippet.small(
7 | :document="JSON.parse(reference.document)",
8 | v-bind:key="reference.position",
9 | :with_abstract="false",
10 | :with_cover="false",
11 | :with_tags="false",
12 | )
13 | hr(v-if="i !== references.length - 1")
14 |
15 |
16 |
32 |
--------------------------------------------------------------------------------
/web/src/components/SearchList.vue:
--------------------------------------------------------------------------------
1 |
2 | div(v-for="scored_document in scored_documents" v-bind:key="scored_document.position")
3 | .card.mb-3
4 | .card-body
5 | document-snippet(
6 | :document="JSON.parse(scored_document.document)"
7 | :snippets="scored_document.snippets",
8 | :with_abstract="true"
9 | :with_cover="true"
10 | :with_large_caption="true"
11 | )
12 |
13 |
14 |
30 |
31 |
40 |
--------------------------------------------------------------------------------
/web/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 | STC
16 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/web/src/main.ts:
--------------------------------------------------------------------------------
1 | import './scss/styles.scss'
2 | import 'bootstrap'
3 | import 'bootstrap/js/dist/tab'
4 |
5 | import { createApp } from 'vue'
6 |
7 | import App from './App.vue'
8 | import router from './router'
9 | import { get_label } from './translations'
10 | import {SearchService} from "@/services/search/search-service";
11 |
12 | // Set theme to the user's preferred color scheme
13 | function updateTheme () {
14 | const color_mode = window.matchMedia('(prefers-color-scheme: dark)').matches
15 | ? 'dark'
16 | : 'light'
17 | document.querySelector('html').setAttribute('data-bs-theme', color_mode)
18 | }
19 |
20 | // Set theme on load
21 | updateTheme()
22 |
23 | // Update theme when the preferred scheme changes
24 | window
25 | .matchMedia('(prefers-color-scheme: dark)')
26 | .addEventListener('change', updateTheme)
27 |
28 | const app = createApp(App)
29 | app.use(router)
30 |
31 | app.mixin({
32 | methods: {
33 | get_label
34 | }
35 | })
36 | app.config.globalProperties.search_service = new SearchService("info")
37 | app.mount('#app')
38 |
--------------------------------------------------------------------------------
/cybrex/cybrex/llm_manager.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 |
4 | class LLMManager:
5 | def __init__(self, llm, prompter, config, max_prompt_chars, tokenizer=None):
6 | self.llm = llm
7 | self.prompter = prompter
8 | self.config = config
9 | self.max_prompt_chars = max_prompt_chars
10 | self.tokenizer = tokenizer
11 |
12 | @property
13 | def context_length(self):
14 | return self.config['context_length']
15 |
16 | def process(self, prompt):
17 | logging.getLogger('statbox').info({'action': 'process', 'mode': 'llm_manager', 'prompt': prompt})
18 | if self.tokenizer:
19 | input_ids = self.tokenizer(prompt, return_tensors="pt")["input_ids"]
20 | outputs = self.llm.generate(
21 | input_ids,
22 | max_new_tokens=self.config.get('max_new_tokens'),
23 | temperature=self.config.get('temperature', 1.0),
24 | )
25 | return self.tokenizer.batch_decode(outputs[:, input_ids.shape[1]:])[0].replace('', '')
26 | else:
27 | return self.llm(prompt)
28 |
--------------------------------------------------------------------------------
/web/src/components/ConnectivityIssues.vue:
--------------------------------------------------------------------------------
1 |
2 | div.font-monospace
3 | p {{ text }}
4 |
5 |
6 |
42 |
--------------------------------------------------------------------------------
/web/.eslintrc.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | "env": {
3 | "browser": true,
4 | "es2021": true
5 | },
6 | "extends": [
7 | "standard-with-typescript",
8 | "plugin:vue/vue3-recommended",
9 | "@vue/typescript/recommended",
10 | ],
11 | "overrides": [
12 | {
13 | "env": {
14 | "node": true
15 | },
16 | "files": [
17 | ".eslintrc.{js,cjs}"
18 | ],
19 | "parserOptions": {
20 | "sourceType": "script"
21 | }
22 | }
23 | ],
24 | "parser": "vue-eslint-parser",
25 | "parserOptions": {
26 | "ecmaVersion": "latest",
27 | "project": "./tsconfig.json",
28 | "sourceType": "module"
29 | },
30 | "plugins": [
31 | "@typescript-eslint",
32 | "vue",
33 | "simple-import-sort"
34 | ],
35 | "root": true,
36 | "rules": {
37 | "simple-import-sort/imports": "error",
38 | "simple-import-sort/exports": "error",
39 | "@typescript-eslint/naming-convention": [
40 | "error",
41 | {
42 | "selector": "variableLike", "format": ["snake_case", "camelCase"]
43 | }
44 | ]
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/tgbot/README.md:
--------------------------------------------------------------------------------
1 | ### Launching bots
2 |
3 | - The first startup will be slow!
4 | - Make sure to mount volumes for persistence. Otherwise, after every restart, you will lose your caches and databases (including users and riot bots).
5 | - Beforehand, you need to set up all credentials in the `.env.light` file. After setting them up, execute the following command in the Terminal:
6 |
7 | ```bash
8 | docker compose --env-file .env.light up --force-recreate --build
9 | ```
10 | Wait for the following line to be displayed in the logs:
11 | ```bash
12 | light-tgbot-1 | INFO:statbox:{'action': 'started', 'mode': 'dynamic_bot', 'bot_name': ''}
13 | ```
14 |
15 | Possible performance optimizations, from least to most complicated:
16 |
17 | - Mount to tgbot to cache bot credentials:
18 | ```yaml
19 | volumes:
20 | - /usr/lib/stc-tgbot:/usr/lib/stc-tgbot
21 | - /var/log/stc-tgbot:/var/log/stc-tgbot
22 | ```
23 | - Mount to ipfs to cache the database and downloaded items:
24 | ```yaml
25 | volumes:
26 | - /data/ipfs:/data/ipfs
27 | ```
28 | - If you have mounted volumes to ipfs, pin the database to IPFS:
29 | ```bash
30 | docker compose --env-file .env.light exec ipfs ipfs pin add /ipns/libstc.cc --progress
31 | ```
32 | - Host the database directly (requires development experience).
--------------------------------------------------------------------------------
/web/src/views/IntroView.vue:
--------------------------------------------------------------------------------
1 |
2 | .container
3 | h2 Welcome, seeker!
4 | p 欢迎,探索者!- आपका स्वागत है, साधक! - ¡Bienvenido buscador! - Bem-vindo, buscador! - مرحبًا بك أيها الباحث! - Добро пожаловать, искатель!
5 | ul
6 | li
7 | a(href="#/nexus_science/cid:bafykbzaceayxkpz5tk6nuqjzoidix4y4lakfwvzkmfisycduijcwfew7waa5e") English through pictures
8 | li
9 | a(href="#/?q=english+for+beginners&p=1&d=nexus_science") Further sources for learning English
10 | h4 {{ get_label('about') }}
11 | span(v-html="get_label('about_intro')")
12 | hr
13 | img.img-fluid(id="origin", src="@/assets/origin.jpg")
14 |
15 |
31 |
32 |
40 |
--------------------------------------------------------------------------------
/library/textutils/__init__.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | NON_ALNUMWHITESPACE_REGEX = re.compile(r'([^\s\w])+')
4 | EMAIL_REGEX = re.compile(r'([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})')
5 | HASHTAG_REGEX = re.compile(r'([#@]+)([A-Za-z0-9_]+)')
6 | MULTIWHITESPACE_REGEX = re.compile(r"\s+")
7 | STICKER_REGEX = re.compile(
8 | '^[\U0001F1E0-\U0001F1FF'
9 | '\U0001F300-\U0001F5FF'
10 | '\U0001F600-\U0001F64F'
11 | '\U0001F680-\U0001F6FF'
12 | '\U0001F700-\U0001F77F'
13 | '\U0001F780-\U0001F7FF'
14 | '\U0001F800-\U0001F8FF'
15 | '\U0001F900-\U0001F9FF'
16 | '\U0001FA00-\U0001FA6F'
17 | '\U0001FA70-\U0001FAFF'
18 | '\U00002702-\U000027B0]$',
19 | flags=re.UNICODE,
20 | )
21 | URL_REGEX_TEXT = r'(https?|ftp)?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)'
22 | URL_REGEX = re.compile(URL_REGEX_TEXT)
23 | HIDDEN_CHAR = ''
24 | TELEGRAM_LINK_REGEX = re.compile('(?:https?://)?t\\.me/(?!joinchat/)([A-Za-z0-9_]+)')
25 |
26 | DOI_WILDCARD_REGEX_TEXT = r'(10.\d{4,9}).*\.\*'
27 | DOI_REGEX_TEXT = r'(?:doi.org/)?(10.\d{4,9})\s?(?:/|%2[Ff])\s?([%-._;()<>/:A-Za-z0-9]+[^.?\s])'
28 | DOI_REGEX = re.compile(DOI_REGEX_TEXT)
29 | ISBN_REGEX = re.compile(r'^(?:[iI][sS][bB][nN]\:?\s*)?((97(8|9))?\-?\d{9}(\d|X))$')
30 | MD5_REGEX = re.compile(r'([A-Fa-f0-9]{32})')
31 | ONLY_DOI_REGEX = re.compile(r'^(10.\d{4,9})\s?/\s?([-._;()<>/:A-Za-z0-9]+[^.?\s])$')
32 | PUBMED_ID_REGEX = re.compile(r'(?:(?:https?://)?(?:www.)?ncbi.nlm.nih.gov/pubmed/|[Pp][Mm][Ii][Dd]\s?:?\s*)([0-9]+)')
33 | CJK_CHAR_REGEX_TEXT = r'[\u4e00-\u9fff]'
34 |
--------------------------------------------------------------------------------
/library/telegram/promotioner.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 |
4 | class Promotioner:
5 | """
6 | Promotioner is used to select promotion randomly based on weights of every promotion.
7 | """
8 | def __init__(
9 | self,
10 | promotions: list,
11 | default_promotion_index: int = 0,
12 | promotion_vars: dict = None,
13 | ):
14 | self.promotions = promotions
15 | self.default_promotion_index = default_promotion_index
16 | if not promotion_vars:
17 | promotion_vars = {}
18 | self.promotion_vars = promotion_vars
19 | self.partial_sums: list = [self.promotions[0]['weight']]
20 | for promotion in self.promotions[1:]:
21 | self.partial_sums.append(promotion['weight'] + self.partial_sums[-1])
22 |
23 | def choose_promotion(self, language: str = 'en') -> str:
24 | pivot = random.randrange(self.partial_sums[-1])
25 | for partial_sum, promotion in zip(self.partial_sums, self.promotions):
26 | if partial_sum <= pivot:
27 | continue
28 | if language in promotion['texts']:
29 | return promotion['texts'][language].format(**self.promotion_vars)
30 | elif promotion.get('local', False):
31 | default_promotion = self.promotions[self.default_promotion_index]
32 | if language in default_promotion['texts']:
33 | return default_promotion['texts'][language].format(**self.promotion_vars)
34 | return default_promotion['texts']['en'].format(**self.promotion_vars)
35 | else:
36 | return promotion['texts']['en'].format(**self.promotion_vars)
37 |
--------------------------------------------------------------------------------
/library/telegram/utils.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import traceback
3 | from contextlib import asynccontextmanager
4 | from typing import Optional
5 |
6 | from izihawa_loglib import error_log
7 | from telethon import (
8 | errors,
9 | events,
10 | )
11 |
12 |
13 | @asynccontextmanager
14 | async def safe_execution(
15 | error_log=error_log,
16 | on_fail: Optional = None,
17 | level=logging.WARNING,
18 | is_logging_enabled: bool = True
19 | ):
20 | try:
21 | try:
22 | yield
23 | except events.StopPropagation:
24 | raise
25 | except errors.MessageNotModifiedError:
26 | pass
27 | except (
28 | errors.UserIsBlockedError,
29 | errors.QueryIdInvalidError,
30 | errors.MessageDeleteForbiddenError,
31 | errors.MessageIdInvalidError,
32 | errors.ChatAdminRequiredError,
33 | ) as e:
34 | if is_logging_enabled:
35 | error_log(e, level=level)
36 | traceback.print_exc()
37 | except ValueError as e:
38 | if e.args and e.args[0].startswith('Request was unsuccessful'):
39 | if is_logging_enabled:
40 | error_log(e, level=level)
41 | else:
42 | raise
43 | except Exception as e:
44 | if is_logging_enabled:
45 | error_log(e, level=level)
46 | traceback.print_exc()
47 | if on_fail:
48 | await on_fail()
49 | except events.StopPropagation:
50 | raise
51 | except Exception as e:
52 | if is_logging_enabled:
53 | error_log(e, level=level)
54 |
--------------------------------------------------------------------------------
/web/src/views/StcHubApiView.vue:
--------------------------------------------------------------------------------
1 |
2 | .container
3 | h3 STC Hub API
4 | p The STC Hub API offers a straightforward way to integrate third-party applications with the STC's extensive corpus of scholarly publications.
5 | h5 Introduction
6 | p IPFS allows the creation of large, distributable directories across multiple peers. Each directory is assigned a unique CID that can be used as a reference.
7 | p We have utilized this feature to compile a directory of numerous scholarly papers, each labeled with its DOI. The entire directory can be accessed via the following alias: /ipns/hub.standard-template-construct.org
8 | p Users have the ability to pin this directory, locate a paper by its DOI, or directly retrieve papers from the directory.
9 | h5 Usage
10 | h6 Retrieving Files Through IPFS
11 | p All DOIs are urlencoded to accommodate special characters in the name. Below is an example of how to retrieve a paper using the Kubo CLI:
12 | pre
13 | code ipfs get /ipns/hub.standard-template-construct.org/10.1145%2F15922.15895.pdf
14 | h6 Retrieving Files Through HTTP API
15 | p The names are urlencoded twice due to the HTTP server decoding urlencoded URLs independently before passing them to the IPFS network:
16 | pre
17 | code
18 | | export IPNS_NAME=/ipns/hub.standard-template-construct.org
19 | | export GATEWAY_URL=http://localhost:8080
20 | | curl -L $GATEWAY_URL/$IPNS_NAME/10.1145%252F15922.15895.pdf
21 |
22 |
31 |
--------------------------------------------------------------------------------
/tgbot/handlers/close.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import time
3 |
4 | from telethon import events
5 |
6 | from library.telegram.base import RequestContext
7 | from library.telegram.utils import safe_execution
8 | from tgbot.translations import t
9 |
10 | from .base import BaseCallbackQueryHandler
11 |
12 |
13 | def is_earlier_than_2_days(message):
14 | if message.date:
15 | return time.time() - time.mktime(message.date.timetuple()) < 48 * 60 * 60 - 10
16 |
17 |
18 | class CloseHandler(BaseCallbackQueryHandler):
19 | filter = events.CallbackQuery(pattern='^/close(?:_([A-Za-z0-9]+))?(?:_([0-9]+))?$')
20 |
21 | async def handler(self, event, request_context: RequestContext):
22 | session_id = event.pattern_match.group(1)
23 | if session_id:
24 | session_id = session_id.decode()
25 | request_context.add_default_fields(mode='close')
26 |
27 | target_events = []
28 | message = await event.get_message()
29 |
30 | if message and is_earlier_than_2_days(message):
31 | target_events.append(event.answer())
32 | request_context.statbox(
33 | action='close',
34 | message_id=message.id,
35 | session_id=session_id,
36 | )
37 | reply_message = await message.get_reply_message()
38 | if reply_message and is_earlier_than_2_days(reply_message):
39 | target_events.append(reply_message.delete())
40 | target_events.append(message.delete())
41 | else:
42 | async with safe_execution(is_logging_enabled=False):
43 | await event.answer(t('DELETION_FORBIDDEN_DUE_TO_AGE'))
44 | await asyncio.gather(*target_events)
45 |
--------------------------------------------------------------------------------
/tgbot/handlers/report.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 |
3 | from stc_geck.advices import BaseDocumentHolder
4 | from telethon import events
5 |
6 | from library.telegram.base import RequestContext
7 | from library.telegram.utils import safe_execution
8 |
9 | from .base import BaseCallbackQueryHandler
10 |
11 |
12 | class ReportHandler(BaseCallbackQueryHandler):
13 | filter = events.NewMessage(incoming=True, pattern=r'^(?:@\w+)?\s+\/r_([A-Za-z0-9_-]+)(?:\s+(.*))?$')
14 |
15 | def parse_pattern(self, event: events.ChatAction):
16 | cid, reason = event.pattern_match.group(1),event.pattern_match.group(2)
17 | return cid, reason
18 |
19 | async def handler(self, event: events.ChatAction, request_context: RequestContext):
20 | cid, reason = self.parse_pattern(event)
21 |
22 | request_context.add_default_fields(mode='report', cid=cid)
23 | request_context.statbox(action='report')
24 |
25 | document = await self.application.summa_client.get_one_by_field_value('nexus_science', 'cid', cid)
26 | document_holder = BaseDocumentHolder(document)
27 |
28 | await self.application.database.add_vote_broken_file(
29 | bot_name=self.bot_config['bot_name'],
30 | user_id=request_context.chat['chat_id'],
31 | internal_id=document_holder.get_internal_id(),
32 | cid=cid,
33 | reason=reason,
34 | )
35 | async with safe_execution():
36 | return await asyncio.gather(
37 | event.reply(
38 | f'Thank you for reporting `{document_holder.get_internal_id()}`. '
39 | f'Be careful, too many misreports will cause a ban',
40 | ),
41 | event.delete(),
42 | )
43 |
--------------------------------------------------------------------------------
/web/public/favicon-black.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/library/textutils/html_processing.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from library.textutils.utils import despace
4 |
5 |
6 | def reduce_br(soup_str):
7 | soup_str = soup_str.replace("
", "
").replace('
', '
').replace('
', '')
8 | soup_str = re.sub(r'([^.>])
([^(
)])', r'\g<1> \g<2>', soup_str)
9 | soup_str = re.sub(r'(?:
\s*)+([^(
)])', r'
\g<1>', soup_str)
10 | soup_str = despace(soup_str)
11 | return soup_str
12 |
13 |
14 | def remove_chars(soup_str):
15 | soup_str = soup_str.replace('\ufeff', '').replace('\r\n', '\n')
16 | return soup_str
17 |
18 |
19 | def process_tags(soup):
20 | for el in soup.find_all():
21 | if el.name == 'span':
22 | el.unwrap()
23 | elif el.name == 'em':
24 | el.name = 'i'
25 | elif el.name == 'italic':
26 | el.name = 'i'
27 | elif el.name == 'strong':
28 | el.name = 'b'
29 | elif el.name == 'sec':
30 | el.name = 'section'
31 | elif el.name == 'p' and 'ref' in el.attrs.get('class', []):
32 | el.name = 'ref'
33 | elif el.name == 'disp-formula':
34 | el.name = 'formula'
35 | new_attrs = {}
36 | if 'href' in el.attrs:
37 | new_attrs['href'] = el.attrs['href']
38 | if 'class' in el.attrs:
39 | new_attrs['class'] = el.attrs['class']
40 | el.attrs = new_attrs
41 | return soup
42 |
43 |
44 | def headerize_headers(soup):
45 | for el in soup.find_all():
46 | if el.name == 'p':
47 | children = list(el.children)
48 | if len(children) == 1 and children[0].name == 'b':
49 | new_header = children[0]
50 | new_header.name = 'header'
51 | el.replace_with(new_header)
52 | return soup
53 |
--------------------------------------------------------------------------------
/web/public/favicon.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/web/src/App.vue:
--------------------------------------------------------------------------------
1 |
2 | div.d-flex.flex-column.min-vh-100.w-100
3 | header
4 | nav.navbar.navbar-expand
5 | .container-fluid
6 | router-link.ms-2.navbar-brand(to="/")
7 | img.favicon-inversion-filter(src="/favicon-black.svg" alt="" width="36" height="36")
8 | .navbar-nav.me-auto
9 | .navbar-nav
10 | router-link.nav-link(to="/bookmarks" data-bs-toggle="tooltip" data-bs-placement="top" title="Bookmarks")
11 | div.text-center
12 | i.bi.bi-bookmark
13 | router-link.nav-link(to="/help" data-bs-toggle="tooltip" data-bs-placement="top" title="Help")
14 | div.text-center
15 | i.bi.bi-question-circle-fill
16 | .mb-3
17 | router-view
18 | footer.footer.mt-auto.text-end.small.mb-3
19 | .container.small
20 | div {{ get_label ("stamp") }}
21 | a(href="https://github.com/nexus-stc/stc/issues/new?assignees=&labels=&projects=&template=bug_report.md&title=") {{ get_label("report_a_bug") }}
22 | span |
23 | a(href="/#/help") {{ get_label("help") }}
24 |
25 |
26 |
39 |
59 |
--------------------------------------------------------------------------------
/web/src/views/StcBoxView.vue:
--------------------------------------------------------------------------------
1 |
2 | .container
3 | h3 {{ get_label("stc_box") }}
4 | p You can set up STC on a small computer to act as a personal or group library.
5 | h5 Minimum Requirements
6 | p For testing, we used an Orange PI 5 with 16GB of RAM, a Sandisk 128GB MicroSD, and a 16TB Seagate Exos HDD attached externally by a USB cord, all of which demonstrated strong performance.
7 | p In general, you can use any computer with the following specs
8 | ul
9 | li A 4-core arm64 or x86-based CPU
10 | li 16GB RAM
11 | li 256GB+ of storage for the metadata database, 10TB+ for the entire dataset
12 | p If you are using Orange PI, take a note on power supply. We have experienced issues when attached HDD through USB3.0 but everything was all right after switching to left USB2.0.
13 | h5 System Configuration
14 | h6 Burning the MicroSD Card
15 | p Start by obtaining an OS image suitable for your hardware. Orange PI images can be downloaded from their official site.
16 | p Burn the image file (usually *.iso or *.img) onto your MicroSD card. On MacOS and Linux, this can be done using the dd utility (set if and of to correspond to your file and disk respectively):
17 | pre
18 | code
19 | | sudo dd if=file.img of=/dev/sde status=progress conv=fsync
20 | h6 IPFS Configuration
21 | p Follow Steps 1 and 2 from the replication guide to set up IPFS.
22 | h5 Start Using It!
23 | p Install IPFS on any desktop in the same LAN, and then open STC.
24 |
25 |
35 |
--------------------------------------------------------------------------------
/library/user_manager/user_manager.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 |
4 | class UserManager:
5 | def __init__(self):
6 | self.search_times = {}
7 | self.search_ban_times = {}
8 | self.tasks = set()
9 | self.limits = {}
10 |
11 | def add_search_time(self, user_id: str, search_time: float):
12 | current_time = time.time()
13 | search_times = self.search_times.get(user_id, [])
14 | search_times.append(search_time)
15 | counter = 0
16 |
17 | for i in reversed(search_times):
18 | if i > current_time - 10:
19 | counter = counter + 1
20 | if counter > 5:
21 | self.search_ban_times[user_id] = current_time + int(60)
22 | del self.search_times[user_id]
23 | return
24 | else:
25 | if counter == 1:
26 | del self.search_times[user_id]
27 | return
28 |
29 | if len(search_times) > 20:
30 | self.search_ban_times[user_id] = current_time + int(120)
31 | del self.search_times[user_id]
32 | return
33 |
34 | self.search_times[user_id] = search_times
35 |
36 | def check_search_ban_timeout(self, user_id: str):
37 | ban_time = self.search_ban_times.get(user_id)
38 | if ban_time:
39 | timeout = int(ban_time - time.time())
40 | if timeout > 0:
41 | return timeout
42 | del self.search_ban_times[user_id]
43 |
44 | def add_task(self, user_id, id):
45 | self.tasks.add((user_id, id))
46 | self.limits[user_id] = self.limits.get(user_id, 0) + 1
47 |
48 | def remove_task(self, user_id, id):
49 | self.tasks.remove((user_id, id))
50 | self.limits[user_id] = self.limits.get(user_id, 1) - 1
51 |
52 | def has_task(self, user_id, id):
53 | return (user_id, id) in self.tasks
54 |
55 | def hit_limits(self, user_id):
56 | return self.limits.get(user_id, 0) >= 3
57 |
--------------------------------------------------------------------------------
/tgbot/promotions/promotions.yaml:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | promotions:
4 | - texts:
5 | en: 💬 The victory of humanity is inevitable
6 | weight: 1
7 | - texts:
8 | en: 💬 Shall build Standard Template Construct
9 | weight: 1
10 | - texts:
11 | en: 💬 Gaining knowledge is the only purpose of life
12 | weight: 1
13 | - texts:
14 | en: 💬 Knowledge cannot belong
15 | weight: 1
16 | - texts:
17 | en: 💬 Obey the path of discovery
18 | weight: 1
19 | - texts:
20 | en: 💬 Research is the only and ultimate goal
21 | weight: 1
22 | - texts:
23 | en: 💬 Intellectual property is not a valid form of property
24 | weight: 1
25 | - texts:
26 | en: ⤴️ Stay tuned with us at @{related_channel}, [Twitter]({twitter_contact_url}) and [Reddit]({reddit_url})
27 | es: ⤴️ Mantente en contacto con nosotros en @{related_channel}, [Twitter]({twitter_contact_url}) y [Reddit]({reddit_url})
28 | it: ⤴️ Resta aggiornato con noi su @{related_channel}, [Twitter]({twitter_contact_url}) e [Reddit]({reddit_url})
29 | pb: ⤴️ Fique ligado conosco em @{related_channel}, [Twitter]({twitter_contact_url}) e [Reddit]({reddit_url})
30 | ru: ⤴️ Оставайся на связи с нами на @{related_channel}, [Twitter]({twitter_contact_url}) и в [Reddit]({reddit_url})
31 | weight: 5
32 | - texts:
33 | en: 🧬 Join [Nexus Communities](https://t.me/+fPQIvxQmJGQ3MzU8), the spaces to discuss science
34 | weight: 50
35 | - texts:
36 | en: 🔥 Join [our Reddit](https://www.reddit.com/r/science_nexus) to learn more about Nexus/STC
37 | weight: 50
38 | - texts:
39 | en: 🐦 Subscribe to our [Twitter](https://twitter.com/the_superpirate) to receive news first
40 | weight: 50
41 | - texts:
42 | en: ✉️ Subscribe to our [Telegram](https://t.me/nexus_search) to stay with us
43 | weight: 50
44 | - texts:
45 | en: ⤴️ Try [Standard Template Construct](https://libstc.cc) library
46 | ru: ⤴️ Заходи в библиотеку [Стандартных Шаблонных Конструкций](https://libstc.cc)
47 | weight: 5
48 |
--------------------------------------------------------------------------------
/web/src/views/DonateView.vue:
--------------------------------------------------------------------------------
1 |
2 | .container
3 | h3 {{ get_label("donate") }}
4 | p(v-html='get_label("donate_content")')
5 | .col-lg-6
6 | ul.nav.nav-tabs(id="currencies-tab" role="tablist")
7 | li.nav-item(role="presentation")
8 | button.nav-link.active(id="btc-tab" data-bs-toggle="tab" data-bs-target="#btc-tab-pane" type="button" role="tab" aria-controls="btc-tab-pane" aria-selected="true") BTC
9 | li.nav-item(role="presentation")
10 | button.nav-link(id="eth-tab" data-bs-toggle="tab" data-bs-target="#eth-tab-pane" type="button" role="tab" aria-controls="eth-tab-pane" aria-selected="false") ETH
11 | li.nav-item(role="presentation")
12 | button.nav-link(id="xmr-tab" data-bs-toggle="tab" data-bs-target="#xmr-tab-pane" type="button" role="tab" aria-controls="xmr-tab-pane" aria-selected="false") XMR
13 |
14 | .tab-content.mt-3(id="currencies-tab-content")
15 | .tab-pane.fade.show.active(id="btc-tab-pane" role="tabpanel" aria-labelledby="btc-tab")
16 | p
17 | code 357vJAFsYeCtLU36MYEgaDueg34rr5ajCy
18 | p
19 | img.favicon-inversion-filter.img-fluid(src="@/assets/btc.svg")
20 | .tab-pane.fade(id="eth-tab-pane" role="tabpanel" aria-labelledby="eth-tab")
21 | p
22 | code 0x199bECe965e4e1e2fE3065d3F551Ebe8520AC555
23 | p
24 | img.favicon-inversion-filter.img-fluid(src="@/assets/eth.svg")
25 | .tab-pane.fade(id="xmr-tab-pane" role="tabpanel" aria-labelledby="xmr-tab")
26 | p
27 | code 42HZx5Cg1uQ2CtCrq7QabP23BN7gBrGu6U6QumkMmR4bKS61gcoP8xyNzP5cJCbjac9yaWFhLsDmM3adMWyBKBXn1d9WiUb
28 | p
29 | img.favicon-inversion-filter.img-fluid(src="@/assets/xmr.svg")
30 |
31 |
43 |
--------------------------------------------------------------------------------
/web/vite.config.ts:
--------------------------------------------------------------------------------
1 | import { fileURLToPath, URL } from 'node:url'
2 |
3 | import react from '@vitejs/plugin-react'
4 | import vue from '@vitejs/plugin-vue'
5 | import { defineConfig } from 'vite'
6 | import topLevelAwait from 'vite-plugin-top-level-await'
7 | import wasm from 'vite-plugin-wasm'
8 | import vuePugPlugin from 'vue-pug-plugin'
9 |
10 | import summa_config from './summa-config.json'
11 |
12 | // https://vitejs.dev/config/
13 | export default defineConfig({
14 | base: '',
15 | build: {
16 | rollupOptions: {
17 | input: {
18 | index: './index.html'
19 | },
20 | output: [
21 | {
22 | name: 'assets/[name].[hash].js'
23 | }
24 | ]
25 | },
26 | target: 'esnext'
27 | },
28 | plugins: [
29 | react({
30 | include: '**/*.vue'
31 | }),
32 | vue({
33 | template: {
34 | preprocessOptions: {
35 | // 'preprocessOptions' is passed through to the pug compiler
36 | plugins: [vuePugPlugin]
37 | }
38 | }
39 | }),
40 | wasm(),
41 | topLevelAwait(),
42 | ],
43 | worker: {
44 | format: 'es',
45 | plugins: [wasm()]
46 | },
47 | resolve: {
48 | alias: {
49 | '@': fileURLToPath(new URL('./src', import.meta.url)),
50 | '~': fileURLToPath(new URL('./node_modules', import.meta.url))
51 | },
52 | preserveSymlinks: true
53 | },
54 | server: {
55 | fs: {
56 | // Allow serving files from one level up to the project root
57 | allow: ['..']
58 | },
59 | proxy: {
60 | '^/data': {
61 | target: `${summa_config.ipfs_http_base_url}/ipns/standard-template-construct.org/data`,
62 | changeOrigin: true,
63 | secure: false,
64 | rewrite: (path) => path.replace(/^\/data/, ''),
65 | },
66 | '^/images/wiki': {
67 | target: `${summa_config.ipfs_http_base_url}/ipns/en.wikipedia-on-ipfs.org/I`,
68 | changeOrigin: true,
69 | secure: false,
70 | rewrite: (path) => path.replace(/^\/images\/wiki/, ''),
71 | }
72 | }
73 | }
74 | })
--------------------------------------------------------------------------------
/tgbot/handlers/roll.py:
--------------------------------------------------------------------------------
1 | import re
2 | import time
3 |
4 | from telethon import events
5 |
6 | from library.telegram.base import RequestContext
7 | from tgbot.views.telegram.base_holder import BaseTelegramDocumentHolder
8 |
9 | from .base import BaseHandler
10 |
11 |
12 | class RollHandler(BaseHandler):
13 | filter = events.NewMessage(incoming=True, pattern=re.compile(r'^/roll(?:@\w+)?(.*)?$', re.DOTALL))
14 | is_group_handler = True
15 |
16 | async def handler(self, event: events.ChatAction, request_context: RequestContext):
17 | start_time = time.time()
18 |
19 | session_id = self.generate_session_id()
20 | request_context.add_default_fields(mode='roll', session_id=session_id)
21 | string_query = event.pattern_match.group(1).strip()
22 |
23 | query, query_traits = self.application.search_request_builder.process(
24 | string_query,
25 | is_fieldnorms_scoring_enabled=False,
26 | collector='reservoir_sampling',
27 | limit=1,
28 | default_query_language=request_context.chat['language'],
29 | )
30 | documents = await self.application.summa_client.search_documents(query)
31 |
32 | if documents:
33 | holder = BaseTelegramDocumentHolder(documents[0])
34 | promo = self.application.promotioner.choose_promotion(query_traits.query_language)
35 | view = holder.view_builder(query_traits.query_language).add_view(bot_name=request_context.bot_name).add_new_line(2).add(promo, escaped=True).build()
36 | buttons_builder = holder.buttons_builder(query_traits.query_language)
37 |
38 | if request_context.is_group_mode():
39 | buttons_builder.add_remote_download_button(bot_name=request_context.bot_name)
40 | else:
41 | buttons_builder.add_download_button()
42 | buttons_builder.add_close_button()
43 |
44 | request_context.statbox(action='show', duration=time.time() - start_time)
45 | await event.respond(view, buttons=buttons_builder.build(), link_preview=True)
46 |
--------------------------------------------------------------------------------
/geck/stc_geck/utils.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import logging
3 | import os
4 | import re
5 | import socket
6 | import tempfile
7 | from urllib.parse import quote
8 |
9 | import ipfs_hamt_directory_py
10 |
11 | NON_ALNUMWHITESPACE_REGEX = re.compile(r'([^\s\w])+')
12 | MULTIWHITESPACE_REGEX = re.compile(r"\s+")
13 |
14 |
15 | def cast_string_to_single_string(s):
16 | processed = MULTIWHITESPACE_REGEX.sub(' ', NON_ALNUMWHITESPACE_REGEX.sub(' ', s))
17 | processed = processed.strip().replace(' ', '-')
18 | return processed
19 |
20 |
21 | async def create_car(output_car, documents, limit, name_template) -> str:
22 | with tempfile.TemporaryDirectory() as td:
23 | input_data = os.path.join(td, 'input_data.txt')
24 | with open(input_data, 'wb') as f:
25 | async for document in documents:
26 | if limit <= 0:
27 | break
28 | id_ = document.get('doi') or document.get('md5')
29 | item_name = name_template.format(
30 | title=cast_string_to_single_string(document['title']) if 'title' in document else id_,
31 | id=id_,
32 | md5=document.get('md5'),
33 | doi=document.get('doi'),
34 | extension=document.get('metadata', {}).get('extension', 'pdf'),
35 | )
36 | f.write(quote(item_name, safe='').encode())
37 | f.write(b' ')
38 | f.write(document['cid'].encode())
39 | f.write(b' ')
40 | f.write(str(document.get('filesize') or 0).encode())
41 | f.write(b'\n')
42 | limit -= 1
43 | return await asyncio.get_event_loop().run_in_executor(
44 | None, lambda: ipfs_hamt_directory_py.from_file(input_data, output_car, td),
45 | )
46 |
47 |
48 | def is_endpoint_listening(endpoint):
49 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
50 | ip, port = endpoint.split(':')
51 | try:
52 | is_open = sock.connect_ex((ip, int(port))) == 0
53 | sock.close()
54 | return is_open
55 | except socket.gaierror as e:
56 | logging.getLogger('warning').warning({'action': 'warning', 'error': str(e)})
57 | return False
58 |
--------------------------------------------------------------------------------
/web/src/views/DocumentView.vue:
--------------------------------------------------------------------------------
1 |
2 | .container
3 | loading-spinner(v-if="is_loading" style="margin-top: 140px" :label="get_label('loading_document') + '...'")
4 | connectivity-issues-view(v-else-if="is_loading_failed")
5 | div(v-else-if="not_found") Not found
6 | document(v-else-if="document" :document="document")
7 |
8 |
9 |
77 |
--------------------------------------------------------------------------------
/tgbot/handlers/q.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import re
3 |
4 | from bs4 import BeautifulSoup
5 | from telethon import events
6 |
7 | from library.telegram.base import RequestContext
8 | from library.telegram.common import close_button
9 | from library.textutils.utils import remove_markdown
10 |
11 | from ..translations import t
12 | from .base import BaseHandler
13 | from ..views.telegram.common import encode_query_to_deep_link
14 |
15 |
16 | class QHandler(BaseHandler):
17 | filter = events.NewMessage(incoming=True, pattern=re.compile(r'^/q(?:@\w+)?(?:\s+(.*))?$', re.DOTALL))
18 | is_group_handler = True
19 |
20 | async def handler(self, event: events.ChatAction, request_context: RequestContext):
21 | session_id = self.generate_session_id()
22 | request_context.add_default_fields(mode='cybrex', session_id=session_id)
23 | request_context.statbox(action='show', sender_id=event.sender_id)
24 |
25 | query = event.pattern_match.group(1)
26 | if not query:
27 | text = "Send query for semantic search after `/q`: `/q What is hemoglobin?`"
28 | return await event.reply(text)
29 | query = query.strip()
30 |
31 | scored_chunks = await self.application.cybrex_ai.semantic_search(query, n_chunks=3, n_documents=0)
32 | response = f'🤔 **{query}**'
33 |
34 | references = []
35 | for scored_chunk in scored_chunks[:3]:
36 | field, value = scored_chunk.chunk.document_id.split(':', 2)
37 |
38 | document_id = f'{field}:{value}'
39 | title = scored_chunk.chunk.title.replace('\n', ' - ')
40 | text_title = BeautifulSoup(title or '', 'lxml').get_text(separator='')
41 | deep_query = encode_query_to_deep_link(document_id, bot_name=request_context.bot_name)
42 | if deep_query:
43 | reference = f' - **{text_title}** - [{document_id}]({deep_query})'
44 | else:
45 | reference = f' - **{text_title}** - `{document_id}`'
46 | reference += f'\n**Text:** {remove_markdown(scored_chunk.chunk.text)}'
47 | references.append(reference)
48 |
49 | references = '\n\n'.join(references)
50 | if references:
51 | response += f'\n\n**References:**\n\n{references}'
52 | return await event.reply(response, buttons=[close_button()])
53 |
--------------------------------------------------------------------------------
/tgbot/configs/logging.yaml:
--------------------------------------------------------------------------------
1 | ---
2 |
3 | logging:
4 | disable_existing_loggers: false
5 | formatters:
6 | base:
7 | class: izihawa_loglib.formatters.BaseFormatter
8 | default:
9 | class: izihawa_loglib.formatters.DefaultFormatter
10 | traceback:
11 | class: izihawa_loglib.formatters.TracebackFormatter
12 | handlers:
13 | console:
14 | class: logging.StreamHandler
15 | level: INFO
16 | stream: 'ext://sys.stderr'
17 | debug:
18 | class: izihawa_loglib.handlers.BaseFileHandler
19 | filename: '{{ log_path }}/debug.log'
20 | formatter: default
21 | level: DEBUG
22 | error:
23 | class: izihawa_loglib.handlers.BaseFileHandler
24 | filename: '{{ log_path }}/error.log'
25 | formatter: default
26 | level: ERROR
27 | operation:
28 | class: izihawa_loglib.handlers.BaseFileHandler
29 | filename: '{{ log_path }}/operation.log'
30 | formatter: base
31 | level: DEBUG
32 | statbox:
33 | class: izihawa_loglib.handlers.BaseFileHandler
34 | filename: '{{ log_path }}/statbox.log'
35 | formatter: default
36 | level: INFO
37 | traceback:
38 | class: izihawa_loglib.handlers.BaseFileHandler
39 | filename: '{{ log_path }}/traceback.log'
40 | formatter: traceback
41 | level: ERROR
42 | warning:
43 | class: izihawa_loglib.handlers.BaseFileHandler
44 | filename: '{{ log_path }}/warning.log'
45 | formatter: default
46 | level: WARNING
47 | loggers:
48 | aiobaseclient:
49 | handlers:
50 | - error
51 | - warning
52 | propagate: false
53 | chardet:
54 | handlers:
55 | - error
56 | propagate: false
57 | debug:
58 | handlers:
59 | - debug
60 | propagate: false
61 | error:
62 | handlers:
63 | - console
64 | - error
65 | - traceback
66 | - warning
67 | propagate: false
68 | operation:
69 | handlers:
70 | - operation
71 | propagate: false
72 | statbox:
73 | handlers:
74 | - console
75 | - statbox
76 | propagate: false
77 | telethon:
78 | handlers:
79 | - error
80 | - warning
81 | propagate: false
82 | root:
83 | handlers:
84 | - debug
85 | level: DEBUG
86 | version: 1
87 |
--------------------------------------------------------------------------------
/web/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "summa-web",
3 | "version": "0.0.0",
4 | "scripts": {
5 | "dev": "vite --mode development",
6 | "build": "run-p type-check build-only",
7 | "preview": "vite preview --port 4173",
8 | "build-only": "vite build --config vite.config.ts && vite build --config vite-sw.config.ts",
9 | "type-check": "vue-tsc --noEmit",
10 | "lint": "eslint . --ext .vue,.js,.jsx,.cjs,.mjs,.ts,.tsx,.cts,.mts --fix --ignore-path .gitignore",
11 | "publish": "bash publi.sh"
12 | },
13 | "dependencies": {
14 | "@grpc/grpc-js": "^1.9.7",
15 | "@protobuf-ts/grpcweb-transport": "^2.9.1",
16 | "@vueuse/core": "^10.2.1",
17 | "@vueuse/rxjs": "^10.2.1",
18 | "axios": "^1.4.0",
19 | "bootstrap": "^5.3.0",
20 | "comlink": "^4.4.1",
21 | "crypto-js": "^4.1.1",
22 | "detect-browser": "^5.3.0",
23 | "dexie": "^3.2.4",
24 | "epubjs": "^0.3.93",
25 | "google-protobuf": "^3.21.2",
26 | "grpc-web": "^1.4.2",
27 | "hammerjs": "^2.0.8",
28 | "npm-run-all": "^4.1.5",
29 | "pdfjs-dist": "^3.11.174",
30 | "qr-creator": "^1.0.0",
31 | "summa-wasm": "^0.135.7",
32 | "vite-plugin-require": "^1.1.11",
33 | "vite-plugin-top-level-await": "^1.3.1",
34 | "vite-plugin-wasm": "^3.2.2",
35 | "vue": "^3.3.4",
36 | "vue-router": "^4.2.4",
37 | "zingtouch": "^1.0.6"
38 | },
39 | "devDependencies": {
40 | "@protobuf-ts/plugin": "^2.9.1",
41 | "@rushstack/eslint-patch": "^1.3.2",
42 | "@tsconfig/node18": "^18.2.0",
43 | "@types/node": "^20.4.2",
44 | "@typescript-eslint/eslint-plugin": "^5.62.0",
45 | "@vitejs/plugin-react": "^4.0.3",
46 | "@vitejs/plugin-vue": "^4.2.3",
47 | "@vue/eslint-config-prettier": "^8.0.0",
48 | "@vue/eslint-config-typescript": "^11.0.3",
49 | "@vue/tsconfig": "^0.4.0",
50 | "bootstrap-icons": "^1.10.5",
51 | "djvujs-dist": "^0.5.4",
52 | "eslint": "^8.45.0",
53 | "eslint-config-airbnb-base": "^15.0.0",
54 | "eslint-config-standard-with-typescript": "^37.0.0",
55 | "eslint-plugin-import": "^2.27.5",
56 | "eslint-plugin-n": "^16.0.1",
57 | "eslint-plugin-promise": "^6.1.1",
58 | "eslint-plugin-simple-import-sort": "^10.0.0",
59 | "eslint-plugin-vue": "^9.15.1",
60 | "kubo-rpc-client": "^3.0.1",
61 | "prettier": "^3.0.0",
62 | "sass": "^1.64.0",
63 | "ts-node": "^10.9.1",
64 | "typescript": "^5.1.6",
65 | "vite": "^4.4.9",
66 | "vue-pug-plugin": "^2.0.3",
67 | "vue-tsc": "^1.8.5"
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/tgbot/markdownifytg.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from markdownify import (
4 | MarkdownConverter,
5 | abstract_inline_conversion, chomp,
6 | )
7 |
8 | html_heading_re = re.compile(r'(h[1-6]|header|title)')
9 |
10 |
11 | class Converter(MarkdownConverter):
12 | convert_b = abstract_inline_conversion(lambda self: '**')
13 | convert_i = abstract_inline_conversion(lambda self: '__')
14 | convert_em = abstract_inline_conversion(lambda self: '__')
15 |
16 | def convert_header(self, el, text, convert_as_inline):
17 | return '\n' + super().convert_b(el, text, convert_as_inline) + '\n'
18 |
19 | def convert_hn(self, n, el, text, convert_as_inline):
20 | return '\n' + super().convert_b(el, text, convert_as_inline) + '\n'
21 |
22 | def convert_hr(self, el, text, convert_as_inline):
23 | return ''
24 |
25 | def convert_title(self, el, text, convert_as_inline):
26 | return super().convert_b(el, text, convert_as_inline) + '\n'
27 |
28 | def convert_formula(self, el, text, convert_as_inline):
29 | return '🔢\n'
30 |
31 | def convert_a(self, el, text, convert_as_inline):
32 | prefix, suffix, text = chomp(text)
33 | if not text:
34 | return ''
35 | href = el.get('href')
36 | return f'[{text}]({href})'
37 |
38 | def convert_img(self, el, text, convert_as_inline):
39 | return '🖼️\n'
40 |
41 | def convert_table(self, el, text, convert_as_inline):
42 | return '🔢\n'
43 |
44 |
45 | class SnippetConverter(MarkdownConverter):
46 | convert_highlight = abstract_inline_conversion(lambda self: '**')
47 | convert_i = abstract_inline_conversion(lambda self: '')
48 | convert_header = abstract_inline_conversion(lambda self: '')
49 |
50 | def convert_hn(self, n, el, text, convert_as_inline):
51 | return text
52 |
53 | def convert_hr(self, el, text, convert_as_inline):
54 | return ''
55 |
56 | def convert_title(self, el, text, convert_as_inline):
57 | return text
58 |
59 | def convert_formula(self, el, text, convert_as_inline):
60 | return '🔢\n'
61 |
62 | def convert_img(self, el, text, convert_as_inline):
63 | return '🖼️\n'
64 |
65 | def convert_table(self, el, text, convert_as_inline):
66 | return '🔢\n'
67 |
68 |
69 | md_converter = Converter(escape_asterisks=False)
70 | highlight_md_converter = SnippetConverter(escape_asterisks=False)
71 |
72 |
73 | def md(html, **options):
74 | return Converter(**options).convert(html)
75 |
--------------------------------------------------------------------------------
/library/telegram/session_backend/core_postgres.py:
--------------------------------------------------------------------------------
1 | from typing import (
2 | Any,
3 | Union,
4 | )
5 |
6 | from sqlalchemy.dialects.postgresql import insert
7 | from telethon.sessions.memory import _SentFileType
8 | from telethon.tl.types import (
9 | InputDocument,
10 | InputPhoto,
11 | )
12 |
13 | from .core import AlchemyCoreSession
14 |
15 |
16 | class AlchemyPostgresCoreSession(AlchemyCoreSession):
17 | def set_update_state(self, entity_id: int, row: Any) -> None:
18 | t = self.UpdateState.__table__
19 | values = dict(pts=row.pts, qts=row.qts, date=row.date.timestamp(),
20 | seq=row.seq, unread_count=row.unread_count)
21 | with self.engine.begin() as conn:
22 | conn.execute(insert(t)
23 | .values(session_id=self.session_id, entity_id=entity_id, **values)
24 | .on_conflict_do_update(constraint=t.primary_key, set_=values))
25 |
26 | def process_entities(self, tlo: Any) -> None:
27 | rows = self._entities_to_rows(tlo)
28 | if not rows:
29 | return
30 |
31 | t = self.Entity.__table__
32 | ins = insert(t)
33 | upsert = ins.on_conflict_do_update(constraint=t.primary_key, set_={
34 | "hash": ins.excluded.hash,
35 | "username": ins.excluded.username,
36 | "phone": ins.excluded.phone,
37 | "name": ins.excluded.name,
38 | })
39 | with self.engine.begin() as conn:
40 | conn.execute(upsert, [dict(session_id=self.session_id, id=row[0], hash=row[1],
41 | username=row[2], phone=row[3], name=row[4])
42 | for row in rows])
43 |
44 | def cache_file(self, md5_digest: str, file_size: int,
45 | instance: Union[InputDocument, InputPhoto]) -> None:
46 | if not isinstance(instance, (InputDocument, InputPhoto)):
47 | raise TypeError("Cannot cache {} instance".format(type(instance)))
48 |
49 | t = self.SentFile.__table__
50 | values = dict(id=instance.id, hash=instance.access_hash)
51 | with self.engine.begin() as conn:
52 | conn.execute(insert(t)
53 | .values(session_id=self.session_id, md5_digest=md5_digest,
54 | type=_SentFileType.from_type(type(instance)).value,
55 | file_size=file_size, **values)
56 | .on_conflict_do_update(constraint=t.primary_key, set_=values))
57 |
--------------------------------------------------------------------------------
/tgbot/handlers/view.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import time
3 |
4 | from telethon import (
5 | events,
6 | functions,
7 | )
8 | from telethon.errors import MessageIdInvalidError
9 |
10 | from library.telegram.base import RequestContext
11 | from tgbot.translations import t
12 | from tgbot.views.telegram.base_holder import BaseTelegramDocumentHolder
13 |
14 | from .base import BaseHandler
15 |
16 |
17 | def is_earlier_than_2_days(message):
18 | if message.date:
19 | return time.time() - time.mktime(message.date.timetuple()) < 2 * 24 * 60 * 60 - 10
20 |
21 |
22 | class ViewHandler(BaseHandler):
23 | filter = events.NewMessage(incoming=True, pattern='^/v_([A-Za-z0-9_-]+)')
24 |
25 | def parse_pattern(self, event: events.ChatAction):
26 | cid = event.pattern_match.group(1)
27 | return cid
28 |
29 | async def get_message(self, message_id, request_context: RequestContext):
30 | get_message_request = functions.messages.GetMessagesRequest(id=[message_id])
31 | messages = await self.application.get_telegram_client(request_context.bot_name)(get_message_request)
32 | return messages.messages[0]
33 |
34 | async def handler(self, event: events.ChatAction, request_context: RequestContext):
35 | cid = self.parse_pattern(event)
36 |
37 | request_context.add_default_fields(mode='view', cid=cid)
38 | request_context.statbox(action='view')
39 |
40 | language = request_context.chat['language']
41 |
42 | try:
43 | prefetch_message = await event.reply(t("SEARCHING", request_context.chat['language']))
44 | document = await self.application.summa_client.get_one_by_field_value('nexus_science', 'links.cid', cid)
45 | if not document:
46 | return await event.reply(t("OUTDATED_VIEW_LINK", language))
47 | holder = BaseTelegramDocumentHolder(document)
48 | promo = self.application.promotioner.choose_promotion(language)
49 | view_builder = holder.view_builder(language).add_view(bot_name=request_context.bot_name).add_new_line(2).add(promo, escaped=True)
50 | buttons = holder.buttons_builder(language).add_default_layout(
51 | bot_name=request_context.bot_name,
52 | is_group_mode=request_context.is_group_mode(),
53 | ).build()
54 | return await asyncio.gather(
55 | event.delete(),
56 | prefetch_message.edit(view_builder.build(), buttons=buttons, link_preview=holder.has_cover()),
57 | )
58 | except MessageIdInvalidError:
59 | return await event.reply(t("VIEWS_CANNOT_BE_SHARED", language))
60 |
--------------------------------------------------------------------------------
/tgbot/handlers/start.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 |
3 | from telethon import events
4 |
5 | from library.telegram.base import RequestContext
6 | from tgbot.translations import t
7 | from tgbot.views.telegram.common import (
8 | DecodeDeepQueryError,
9 | decode_deep_query, recode_base64_to_base36,
10 | )
11 |
12 | from .search import BaseSearchHandler
13 |
14 |
15 | class StartHandler(BaseSearchHandler):
16 | filter = events.NewMessage(incoming=True, pattern='^/start\\s?(.*)?')
17 |
18 | async def handler(self, event: events.ChatAction, request_context: RequestContext):
19 | raw_query = event.pattern_match.group(1)
20 | string_query = None
21 |
22 | request_context.statbox(action='start', mode='start', text=event.text)
23 |
24 | try:
25 | string_query = decode_deep_query(raw_query)
26 | except DecodeDeepQueryError as e1:
27 | try:
28 | cid = recode_base64_to_base36(raw_query)
29 | string_query = f'links.cid:{cid}'
30 | except DecodeDeepQueryError as e2:
31 | request_context.error_log(e1, mode='start', raw_query=raw_query)
32 | request_context.error_log(e2, mode='start', raw_query=raw_query)
33 |
34 | if string_query:
35 | request_context.statbox(action='query', mode='start', query=string_query)
36 | request_message = await self.application.get_telegram_client(request_context.bot_name).send_message(event.chat, string_query)
37 | prefetch_message = await request_message.reply(
38 | t("SEARCHING", request_context.chat['language']),
39 | )
40 | try:
41 | text, buttons, link_preview = await self.setup_widget(
42 | request_context=request_context,
43 | string_query=string_query,
44 | is_shortpath_enabled=True,
45 | )
46 | edit_action = self.application.get_telegram_client(request_context.bot_name).edit_message(
47 | request_context.chat['chat_id'],
48 | prefetch_message.id,
49 | text,
50 | buttons=buttons,
51 | link_preview=link_preview,
52 | )
53 | await asyncio.gather(
54 | event.delete(),
55 | edit_action,
56 | )
57 | except Exception:
58 | await prefetch_message.delete()
59 | raise
60 | else:
61 | request_context.statbox(action='show', mode='start')
62 | await event.reply(t('HELP', request_context.chat['language']))
63 |
--------------------------------------------------------------------------------
/web/src/views/Reader.vue:
--------------------------------------------------------------------------------
1 |
2 | .container(v-if="error !== undefined")
3 | .row
4 | .col-md-8.offset-md-2
5 | connectivity-issues-view(:reason="error")
6 | .container.col-md-8.offset-md-2(v-else-if="downloading_status !== undefined")
7 | loading-spinner(style="margin-top: 140px" :label="downloading_status")
8 | div(v-else-if="data !== undefined")
9 | epub-reader.inversion-filter(v-if="filename.endsWith('epub')" :anchor="anchor" :data="data" v-on:update-anchor="update_anchor")
10 | djvu-reader(v-else-if="filename.endsWith('djvu')" :anchor="anchor" :data="data" v-on:update-anchor="update_anchor")
11 | pdf-reader(v-else-if="filename.endsWith('pdf')" :anchor="anchor" :data="data" v-on:update-anchor="update_anchor")
12 |
13 |
14 |
75 |
--------------------------------------------------------------------------------
/cybrex/examples/on-the-fly-translation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "from transformers import MBartForConditionalGeneration, MBart50TokenizerFast\n",
12 | "\n",
13 | "model = MBartForConditionalGeneration.from_pretrained(\"facebook/mbart-large-50-many-to-many-mmt\")\n",
14 | "tokenizer = MBart50TokenizerFast.from_pretrained(\"facebook/mbart-large-50-many-to-many-mmt\")"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": null,
20 | "outputs": [],
21 | "source": [
22 | "tokenizer.lang_code_to_id"
23 | ],
24 | "metadata": {
25 | "collapsed": false
26 | }
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": null,
31 | "outputs": [],
32 | "source": [
33 | "article = \"Forty-two patients operated on by skin expansion have been contacted after a mean time of 25 months from the last surgery. Two biopsies have been taken from the expanded area of each patient. In 12 patients it has been possible to obtain a similar sampling from the opposite, nonexpanded area of the body. The samples underwent optic microscopy and cell kinetic and DNA content investigations. The epidermal structure of the followed-up skin, compared with the skin of the opposite side of the body, looks normal. The mitotic activity of the epidermal cells has returned to the values of preexpanded skin. The dermis shows a low degree of elastosis and zonal fragmentation of elastic fibers. The hypodermis, where the expander capsule was removed during the last surgery, does not show an accentuated fibrosis.\"\n",
34 | "tokenizer.src_lang = \"en_XX\"\n",
35 | "inputs = tokenizer(article, return_tensors=\"pt\")\n",
36 | "\n",
37 | "translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id[\"ru_RU\"], max_length=1024)\n",
38 | "tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]"
39 | ],
40 | "metadata": {
41 | "collapsed": false
42 | }
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": null,
47 | "outputs": [],
48 | "source": [],
49 | "metadata": {
50 | "collapsed": false
51 | }
52 | }
53 | ],
54 | "metadata": {
55 | "kernelspec": {
56 | "display_name": "Python 3",
57 | "language": "python",
58 | "name": "python3"
59 | },
60 | "language_info": {
61 | "codemirror_mode": {
62 | "name": "ipython",
63 | "version": 2
64 | },
65 | "file_extension": ".py",
66 | "mimetype": "text/x-python",
67 | "name": "python",
68 | "nbconvert_exporter": "python",
69 | "pygments_lexer": "ipython2",
70 | "version": "2.7.6"
71 | }
72 | },
73 | "nbformat": 4,
74 | "nbformat_minor": 0
75 | }
76 |
--------------------------------------------------------------------------------
/web/src/database.ts:
--------------------------------------------------------------------------------
1 | import Dexie from 'dexie'
2 |
3 | import { average } from '@/utils'
4 |
5 | export class UserDb extends Dexie {
6 | bookmarks!: Dexie.Table
7 | search_metrics!: Dexie.Table
8 |
9 | constructor (name: string, version: number) {
10 | super(name)
11 | this.version(version).stores({
12 | bookmarks: '[index_name+query],created_at',
13 | search_metrics: 'created_at'
14 | })
15 | this.bookmarks.mapToClass(Bookmark)
16 | this.search_metrics.mapToClass(SearchMetric)
17 | }
18 |
19 | async add_search_metrics (search_metrics: SearchMetric) {
20 | return await this.transaction('rw', this.search_metrics, async () => {
21 | await this.search_metrics.offset(100).delete()
22 | return await this.search_metrics.put(search_metrics)
23 | })
24 | }
25 |
26 | async get_average_spent (last_n_time: number) {
27 | return await this.transaction('rw', this.search_metrics, async () => {
28 | const result = await this.search_metrics
29 | .orderBy('created_at')
30 | .reverse()
31 | .limit(last_n_time)
32 | .toArray()
33 | if (result.length < last_n_time) {
34 | return undefined
35 | }
36 | return average(result.map((x) => x.spent))
37 | })
38 | }
39 |
40 | async add_bookmark (bookmark: IBookmark) {
41 | return await this.transaction('rw', this.bookmarks, async () => {
42 | return await this.bookmarks.put(bookmark)
43 | })
44 | }
45 |
46 | async get_all_bookmarks () {
47 | return await this.transaction('rw', this.bookmarks, async () => {
48 | return await this.bookmarks.orderBy('created_at').reverse().toArray()
49 | })
50 | }
51 |
52 | async has_bookmark (index_name: string, query: string) {
53 | return await this.transaction('rw', this.bookmarks, async () => {
54 | return (await this.bookmarks.get([index_name, query])) !== undefined
55 | })
56 | }
57 |
58 | async delete_bookmark (index_name: string, query: string) {
59 | await this.transaction('rw', this.bookmarks, async () => {
60 | await this.bookmarks.delete([index_name, query])
61 | })
62 | }
63 | }
64 |
65 | interface IBookmark {
66 | index_name: string
67 | query: string
68 | created_at: number
69 | }
70 |
71 | export class Bookmark implements IBookmark {
72 | index_name: string
73 | query: string
74 | created_at: number
75 |
76 | constructor (index_name: string, query: string) {
77 | this.index_name = index_name
78 | this.query = query
79 | this.created_at = Date.now() / 1000
80 | }
81 | }
82 |
83 | interface ISearchMetric {
84 | spent: number
85 | created_at: number
86 | }
87 |
88 | export class SearchMetric implements ISearchMetric {
89 | spent: number
90 | created_at: number
91 |
92 | constructor (spent: number) {
93 | this.spent = spent
94 | this.created_at = Date.now() / 1000
95 | }
96 | }
97 |
98 | export const user_db = new UserDb('UserDb', 8)
99 |
--------------------------------------------------------------------------------
/web/public/safari-pinned-tab.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
46 |
--------------------------------------------------------------------------------
/web/src/router/index.ts:
--------------------------------------------------------------------------------
1 | // @ts-nocheck
2 | import { createRouter, createWebHashHistory } from 'vue-router'
3 |
4 | const router = createRouter({
5 | history: createWebHashHistory(import.meta.env.BASE_URL),
6 | scrollBehavior (to, from, savedPosition) {
7 | if (to.path.startsWith("/help") && from.path.startsWith("/help")) {
8 | return {
9 | el: '#hrv',
10 | behavior: 'smooth',
11 | }
12 | }
13 | // always scroll to top
14 | return { top: 0 }
15 | },
16 | routes: [
17 | {
18 | path: '/',
19 | name: 'search',
20 | component: async () => await import('../views/SearchView.vue'),
21 | props: (route) => ({
22 | q: route.query.q,
23 | p: Number.parseInt(route.query.p),
24 | t: route.query.t,
25 | y: route.query.y
26 | })
27 | },
28 | {
29 | path: '/reader',
30 | name: 'reader',
31 | component: async () => await import('../views/Reader.vue'),
32 | props: (route) => ({
33 | cid: route.query.cid,
34 | filename: route.query.filename,
35 | anchor: route.query.anchor
36 | })
37 | },
38 | {
39 | path: '/bookmarks',
40 | name: 'bookmarks',
41 | component: async () => await import('../views/BookmarksView.vue')
42 | },
43 | {
44 | path: '/help',
45 | name: 'help',
46 | component: async () => await import('../views/HelpView.vue'),
47 | children: [
48 | {
49 | path: '',
50 | name: 'intro',
51 | component: async () => await import('../views/IntroView.vue')
52 | },
53 | {
54 | path: 'doomsday',
55 | name: 'doomsday',
56 | component: async () => await import('../views/DoomsdayView.vue')
57 | },
58 | {
59 | path: 'donate',
60 | name: 'donate',
61 | component: async () => await import('../views/DonateView.vue')
62 | },
63 | {
64 | path: 'how-to-search',
65 | name: 'how-to-search',
66 | component: async () => await import('../views/HowToSearchView.vue')
67 | },
68 | {
69 | path: 'install-ipfs',
70 | name: 'install-ipfs',
71 | component: async () => await import('../views/InstallIpfsView.vue')
72 | },
73 | {
74 | path: 'replicate',
75 | name: 'replicate',
76 | component: async () => await import('../views/ReplicateView.vue')
77 | },
78 | {
79 | path: 'stc-box',
80 | name: 'stc-box',
81 | component: async () => await import('../views/StcBoxView.vue')
82 | },
83 | {
84 | path: 'stc-hub-api',
85 | name: 'stc-hub-api',
86 | component: async () => await import('../views/StcHubApiView.vue')
87 | }
88 | ]
89 | },
90 | {
91 | path: '/nexus_science/:id(.+)',
92 | name: 'document',
93 | component: async () => await import('../views/DocumentView.vue'),
94 | props: true
95 | }
96 | ]
97 | })
98 |
99 | export default router
100 |
--------------------------------------------------------------------------------
/web/src/services/search/search-service.ts:
--------------------------------------------------------------------------------
1 | // @ts-nocheck
2 |
3 | import {
4 | type IndexQuery,
5 | } from 'summa-wasm'
6 | import {
7 | IpfsSearchProvider,
8 | RemoteSearchProvider,
9 | type SearchProvider, SearchProviderStatus,
10 | } from "@/services/search/search-provider";
11 | import {ref} from "vue";
12 | import {utils} from "summa-wasm";
13 |
14 | export class SearchService {
15 | search_providers: Array;
16 | current_provider_ix: Number;
17 | init_guard: Promise;
18 | current_init_status: any;
19 | loading_failure_reason: any;
20 |
21 |
22 | constructor(logging_level: string) {
23 | this.current_init_status = ref(undefined);
24 | let search_providers = [];
25 | let { ipfs_hostname, ipfs_protocol } = utils.get_ipfs_hostname();
26 | const ipfs_hostname_stripped = ipfs_hostname.split(':')[0]
27 | if (
28 | ipfs_hostname_stripped !== 'localhost'
29 | && ipfs_hostname_stripped !== 'ipfs.io'
30 | && ipfs_hostname_stripped !== 'dweb.link'
31 | ) {
32 | search_providers.push(new RemoteSearchProvider(
33 | `${ipfs_protocol}//api.${ipfs_hostname_stripped}`,
34 | "Local API",
35 | ));
36 | }
37 | search_providers.push(...[
38 | new RemoteSearchProvider(
39 | "https://api.libstc.cc",
40 | "Nebula Nomad Station",
41 | ),
42 | new IpfsSearchProvider(this.current_init_status, {logging_level}),
43 | ]);
44 | this.search_providers = search_providers;
45 | this.current_provider_ix = ref(undefined);
46 | this.loading_failure_reason = ref(undefined);
47 | this.init_guard = (async () => {
48 | await this.setup();
49 | })()
50 | }
51 |
52 | async setup() {
53 | let last_error = undefined;
54 | for (const [index, search_provider] of this.search_providers.entries()) {
55 | try {
56 | await search_provider.setup(this.current_init_status);
57 | } catch (e) {
58 | last_error = e;
59 | continue;
60 | }
61 | if (search_provider.status.value == SearchProviderStatus.Succeeded) {
62 | this.current_provider_ix.value = index;
63 | return;
64 | }
65 | }
66 | if (last_error !== undefined) {
67 | this.loading_failure_reason.value = last_error.toString();
68 | }
69 | }
70 |
71 | async change_provider(index: Number) {
72 | const new_provider = this.search_providers[index];
73 | if (new_provider.status.value == SearchProviderStatus.NotSetup) {
74 | await new_provider.setup();
75 | } else {
76 | await new_provider.healthcheck();
77 | }
78 | if (new_provider.status.value == SearchProviderStatus.Succeeded) {
79 | this.current_provider_ix.value = index;
80 | }
81 | }
82 |
83 | async search(index_query: IndexQuery, options: QueryOptions): Promise