├── src ├── controllers │ ├── app.controller.ts │ ├── metrics │ │ ├── metrics.controller.ts │ │ └── metrics.controller.spec.ts │ └── render-controller │ │ ├── render.controller.ts │ │ └── render.controller.spec.ts ├── LogLevels.ts ├── models │ └── LeakedRequests.ts ├── main.ts ├── app.module.ts ├── middleware │ └── RequestLoggerMiddleware.ts ├── services │ ├── json-logger.service.ts │ └── prerender.service.ts └── config.ts ├── test ├── setup-env.ts ├── jest-e2e.json └── app.e2e-spec.ts ├── .prettierrc ├── tsconfig.build.json ├── nest-cli.json ├── .editorconfig ├── Dockerfile ├── tsconfig.json ├── .gitignore ├── eslint.config.mjs ├── LICENSE ├── .github └── workflows │ └── docker-publish.yml ├── CHANGELOG.md ├── package.json ├── cliff.toml ├── CODE_OF_CONDUCT.md └── README.md /src/controllers/app.controller.ts: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/setup-env.ts: -------------------------------------------------------------------------------- 1 | process.env.BOTVIEW_LOG_LEVEL = "FATAL"; 2 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "singleQuote": false, 3 | "trailingComma": "all", 4 | "tabWidth": 4 5 | } -------------------------------------------------------------------------------- /tsconfig.build.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.json", 3 | "exclude": ["node_modules", "test", "dist", "**/*spec.ts"] 4 | } 5 | -------------------------------------------------------------------------------- /src/LogLevels.ts: -------------------------------------------------------------------------------- 1 | export enum LogLevels { 2 | FATAL = 60, 3 | ERROR = 50, 4 | WARN = 40, 5 | INFO = 30, 6 | DEBUG = 20, 7 | TRACE = 10, 8 | } 9 | -------------------------------------------------------------------------------- /src/models/LeakedRequests.ts: -------------------------------------------------------------------------------- 1 | export class LeakedRequests { 2 | public url = ""; 3 | public startTime = 0; 4 | public endTime = 0; 5 | public time = 0; 6 | } 7 | -------------------------------------------------------------------------------- /nest-cli.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://json.schemastore.org/nest-cli", 3 | "collection": "@nestjs/schematics", 4 | "sourceRoot": "src", 5 | "compilerOptions": { 6 | "deleteOutDir": true 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /test/jest-e2e.json: -------------------------------------------------------------------------------- 1 | { 2 | "moduleFileExtensions": ["js", "json", "ts"], 3 | "rootDir": ".", 4 | "testEnvironment": "node", 5 | "testRegex": ".e2e-spec.ts$", 6 | "transform": { 7 | "^.+\\.(t|j)s$": "ts-jest" 8 | }, 9 | "setupFiles": ["/setup-env.ts"] 10 | } 11 | -------------------------------------------------------------------------------- /src/main.ts: -------------------------------------------------------------------------------- 1 | import { NestFactory } from "@nestjs/core"; 2 | import { AppModule } from "./app.module"; 3 | import { JsonLogger } from "./services/json-logger.service"; 4 | 5 | // init app 6 | async function bootstrap() { 7 | const app = await NestFactory.create(AppModule, { 8 | logger: new JsonLogger(), 9 | }); 10 | await app.listen(3000); 11 | } 12 | bootstrap(); 13 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | [*] 2 | end_of_line = lf 3 | charset = utf-8 4 | 5 | # Code files 6 | [*.cs,*.csx,*.js,*.jsx,*.ts,*,tsx,*.css,*.scss] 7 | insert_final_newline = true 8 | indent_style = space 9 | indent_size = 4 10 | trim_trailing_whitespace = true 11 | max_line_length = 140 12 | quote_type = double 13 | curly_bracket_next_line = true 14 | spaces_around_operators = true 15 | spaces_around_brackets = true 16 | indent_brace_style = Allman 17 | continuation_indent_size = 4 18 | 19 | [*.xml] 20 | indent_style = space 21 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mcr.microsoft.com/playwright:v1.54.1-noble AS development 2 | 3 | WORKDIR /app 4 | COPY package*.json tsconfig*.json nest-cli.json eslint.config.mjs ./ 5 | RUN npm ci 6 | 7 | COPY ./src ./src 8 | COPY ./test ./test 9 | 10 | RUN npm run test 11 | RUN npm run test:e2e 12 | RUN npm run build 13 | 14 | 15 | FROM mcr.microsoft.com/playwright:v1.54.1-noble as production 16 | 17 | ARG NODE_ENV=production 18 | ENV NODE_ENV=${NODE_ENV} 19 | 20 | WORKDIR /app 21 | COPY package*.json ./ 22 | RUN npm ci --omit=dev 23 | 24 | COPY --from=development /app/dist ./dist 25 | 26 | CMD ["node", "dist/main"] -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "commonjs", 4 | "declaration": true, 5 | "removeComments": true, 6 | "emitDecoratorMetadata": true, 7 | "experimentalDecorators": true, 8 | "allowSyntheticDefaultImports": true, 9 | "target": "ES2023", 10 | "sourceMap": true, 11 | "outDir": "./dist", 12 | "baseUrl": "./", 13 | "incremental": true, 14 | "skipLibCheck": true, 15 | "strictNullChecks": true, 16 | "forceConsistentCasingInFileNames": true, 17 | "noImplicitAny": false, 18 | "strictBindCallApply": false, 19 | "noFallthroughCasesInSwitch": false 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/controllers/metrics/metrics.controller.ts: -------------------------------------------------------------------------------- 1 | import { Controller, Response, Get } from "@nestjs/common"; 2 | import { Response as EResponse } from "express"; 3 | import { collectDefaultMetrics, Registry } from "prom-client"; 4 | 5 | const register = new Registry(); 6 | collectDefaultMetrics({ 7 | register: register, 8 | eventLoopMonitoringPrecision: 100, 9 | }); 10 | 11 | @Controller("metrics") 12 | export class MetricsController { 13 | @Get() 14 | public async getMetrics(@Response() response: EResponse) { 15 | return response 16 | .set("Content-Type", register.contentType) 17 | .send(await register.metrics()); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/app.module.ts: -------------------------------------------------------------------------------- 1 | import { Module } from "@nestjs/common"; 2 | import { RenderController } from "./controllers/render-controller/render.controller"; 3 | import { NestModule, MiddlewareConsumer } from "@nestjs/common"; 4 | import { RequestLoggerMiddleware } from "./middleware/RequestLoggerMiddleware"; 5 | import { MetricsController } from "./controllers/metrics/metrics.controller"; 6 | import { JsonLogger } from "./services/json-logger.service"; 7 | import { PrerenderService } from "./services/prerender.service"; 8 | 9 | @Module({ 10 | controllers: [RenderController, MetricsController], 11 | providers: [JsonLogger, PrerenderService], 12 | }) 13 | export class AppModule implements NestModule { 14 | configure(consumer: MiddlewareConsumer): void { 15 | consumer.apply(RequestLoggerMiddleware).forRoutes("*"); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # compiled output 2 | /dist 3 | /node_modules 4 | /build 5 | 6 | # Logs 7 | logs 8 | *.log 9 | npm-debug.log* 10 | pnpm-debug.log* 11 | yarn-debug.log* 12 | yarn-error.log* 13 | lerna-debug.log* 14 | 15 | # OS 16 | .DS_Store 17 | 18 | # Tests 19 | /coverage 20 | /.nyc_output 21 | 22 | # IDEs and editors 23 | /.idea 24 | .project 25 | .classpath 26 | .c9/ 27 | *.launch 28 | .settings/ 29 | *.sublime-workspace 30 | 31 | # IDE - VSCode 32 | .vscode/* 33 | !.vscode/settings.json 34 | !.vscode/tasks.json 35 | !.vscode/launch.json 36 | !.vscode/extensions.json 37 | 38 | # dotenv environment variable files 39 | .env 40 | .env.development.local 41 | .env.test.local 42 | .env.production.local 43 | .env.local 44 | 45 | # temp directory 46 | .temp 47 | .tmp 48 | 49 | # Runtime data 50 | pids 51 | *.pid 52 | *.seed 53 | *.pid.lock 54 | 55 | # Diagnostic reports (https://nodejs.org/api/report.html) 56 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 57 | -------------------------------------------------------------------------------- /eslint.config.mjs: -------------------------------------------------------------------------------- 1 | // @ts-check 2 | import eslint from '@eslint/js'; 3 | import eslintPluginPrettierRecommended from 'eslint-plugin-prettier/recommended'; 4 | import globals from 'globals'; 5 | import tseslint from 'typescript-eslint'; 6 | 7 | export default tseslint.config( 8 | { 9 | ignores: ['eslint.config.mjs'], 10 | }, 11 | eslint.configs.recommended, 12 | ...tseslint.configs.recommendedTypeChecked, 13 | eslintPluginPrettierRecommended, 14 | { 15 | languageOptions: { 16 | globals: { 17 | ...globals.node, 18 | ...globals.jest, 19 | }, 20 | sourceType: 'commonjs', 21 | parserOptions: { 22 | projectService: true, 23 | tsconfigRootDir: import.meta.dirname, 24 | }, 25 | }, 26 | }, 27 | { 28 | rules: { 29 | '@typescript-eslint/no-explicit-any': 'off', 30 | '@typescript-eslint/no-floating-promises': 'warn', 31 | '@typescript-eslint/no-unsafe-argument': 'warn' 32 | }, 33 | }, 34 | ); -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 MTS 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/controllers/render-controller/render.controller.ts: -------------------------------------------------------------------------------- 1 | import { 2 | BadRequestException, 3 | Controller, 4 | Get, 5 | Header, 6 | Request, 7 | Response, 8 | } from "@nestjs/common"; 9 | import { Response as EResponse } from "express"; 10 | import { PrerenderService } from "../../services/prerender.service"; 11 | 12 | @Controller("render") 13 | export class RenderController { 14 | public constructor(private readonly prerenderService: PrerenderService) {} 15 | 16 | /** 17 | * http://localhost:3000/render/https://mts.ru/ 18 | */ 19 | @Get("*") 20 | @Header("Content-Type", "text/html") 21 | public async getRender( 22 | @Request() reguest: Request, 23 | @Response() response: EResponse, 24 | ): Promise { 25 | const url = decodeURIComponent(reguest.url.substr(8)); 26 | 27 | if (!url) { 28 | throw new BadRequestException("Parameter 'url' is required."); 29 | } 30 | 31 | const result = await this.prerenderService.render(url, reguest.headers); 32 | 33 | const statusCode = Number.parseInt(result.statusCode ?? "200"); 34 | const validStatusCode = Number.isNaN(statusCode) ? 200 : statusCode; 35 | 36 | response.status(validStatusCode).send(result.pageContent); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/middleware/RequestLoggerMiddleware.ts: -------------------------------------------------------------------------------- 1 | import { Injectable, NestMiddleware } from "@nestjs/common"; 2 | 3 | import { Request, Response, NextFunction } from "express"; 4 | import { JsonLogger } from "../services/json-logger.service"; 5 | import { LogLevels } from "../LogLevels"; 6 | 7 | @Injectable() 8 | export class RequestLoggerMiddleware implements NestMiddleware { 9 | private logger = new JsonLogger(); 10 | 11 | use(request: Request, response: Response, next: NextFunction): void { 12 | const { ip, method, originalUrl, query } = request; 13 | const userAgent = request.get("user-agent") || "not set"; 14 | const startTime = performance.now(); 15 | const traceId = request.get("x-trace-id") || void 0; 16 | const realIp = request.get("x-real-ip") || void 0; 17 | 18 | response.on("close", () => { 19 | const { statusCode } = response; 20 | const contentLength = response.get("content-length"); 21 | 22 | let level = LogLevels.WARN; // for statuses 100, 300, 400, 600 23 | if (200 <= statusCode && statusCode < 300) { 24 | level = LogLevels.INFO; 25 | } else if (500 <= statusCode && statusCode < 600) { 26 | level = LogLevels.FATAL; 27 | } 28 | 29 | this.logger.extraLogs("Request", level, { 30 | method: method, 31 | url: originalUrl, 32 | query: query, 33 | statusCode: statusCode, 34 | contentLength: contentLength, 35 | userAgent: userAgent, 36 | userIp: realIp || ip, 37 | processTime: performance.now() - startTime, 38 | traceId: traceId, 39 | }); 40 | }); 41 | 42 | next(); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /.github/workflows/docker-publish.yml: -------------------------------------------------------------------------------- 1 | name: Docker 2 | 3 | # This workflow uses actions that are not certified by GitHub. 4 | # They are provided by a third-party and are governed by 5 | # separate terms of service, privacy policy, and support 6 | # documentation. 7 | 8 | on: 9 | push: 10 | branches: [ main ] 11 | workflow_dispatch: 12 | 13 | jobs: 14 | build: 15 | 16 | runs-on: ubuntu-latest 17 | permissions: 18 | contents: write 19 | packages: write 20 | 21 | steps: 22 | - name: Checkout repository 23 | uses: actions/checkout@v4 24 | 25 | - name: Log into registry docker.io 26 | uses: docker/login-action@v2 27 | with: 28 | registry: docker.io 29 | username: ${{ secrets.DOCKERHUB_USER }} 30 | password: ${{ secrets.DOCKERHUB_TOKEN }} 31 | 32 | - name: Build and publish package 33 | uses: actions/setup-node@v3 34 | with: 35 | node-version: 24 36 | registry-url: https://registry.npmjs.org/ 37 | 38 | - run: git config --global user.email "elabutin@mts.ru" 39 | - run: git config --global user.name "Eugene Labutin" 40 | - run: npm ci 41 | - run: npm run release 42 | - run: git push && git push --tags 43 | 44 | - name: Get version from package.json 45 | run: | 46 | VERSION=$(node -p "require('./package.json').version") 47 | echo "VERSION=$VERSION" >> $GITHUB_ENV 48 | echo "MAJOR=$(echo $VERSION | cut -d. -f1)" >> $GITHUB_ENV 49 | echo "MINOR=$(echo $VERSION | cut -d. -f2)" >> $GITHUB_ENV 50 | echo "PATCH=$(echo $VERSION | cut -d. -f3)" >> $GITHUB_ENV 51 | 52 | - name: Build and push Docker image 53 | uses: docker/build-push-action@v3 54 | with: 55 | push: true 56 | tags: | 57 | mtsrus/botview:latest 58 | mtsrus/botview:${{ env.MAJOR }} 59 | mtsrus/botview:${{ env.MAJOR }}.${{ env.MINOR }} 60 | mtsrus/botview:${{ env.MAJOR }}.${{ env.MINOR }}.${{ env.PATCH }} 61 | 62 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | ## [2.3.5] - 2025-11-19 6 | 7 | ### ⚙️ Miscellaneous Tasks 8 | 9 | - *(botview)* Bump version to 2.3.4 10 | 11 | ## [2.3.3] - 2025-11-19 12 | 13 | ## [2.3.2] - 2025-07-19 14 | 15 | ### 🐛 Bug Fixes 16 | 17 | - Add eslint.config.mjs to Dockerfile and remove unnecessary blank line in README.md 18 | 19 | ## [2.3.1] - 2025-07-19 20 | 21 | ### 🐛 Bug Fixes 22 | 23 | - Improve clarity and grammar in README.md 24 | 25 | ## [2.3.0] - 2025-07-19 26 | 27 | ### 🚀 Features 28 | 29 | - Implement logging levels and update logger imports 30 | 31 | ## [2.2.1] - 2025-07-19 32 | 33 | ## [2.2.0] - 2025-07-19 34 | 35 | ### 🚀 Features 36 | 37 | - Update Docker publish workflow and enhance README with troubleshooting tips for timeout issues 38 | 39 | ## [2.1.0] - 2025-07-19 40 | 41 | ### ⚙️ Miscellaneous Tasks 42 | 43 | - *(botview)* Release botview@2.1.0 44 | 45 | ## [2.0.0] - 2025-03-22 46 | 47 | ### 🚀 Features 48 | 49 | - Replace nestjs-prom by prom-client for monitoring, add json logger, make logs in json format, remove cache service 50 | - Add default timeouts 51 | - Add logger on browser console logs 52 | - Update docker image version 53 | - Add support more env variables, move prerender logic to service layer 54 | - Add long request detection 55 | - Update dependencies versions, use new puppeteer headless mode 56 | - Update dependencies versions 57 | - Update dependencies versions 58 | - Replace puppeter engine by playwright engine, use webkit engine 59 | - Update nestjs to version 11 and other packages to latest versions 60 | 61 | ### 📚 Documentation 62 | 63 | - Update service name in readme 64 | - Update container parameters section in readme 65 | - Add to readme info about change status code 66 | 67 | ### ⚙️ Miscellaneous Tasks 68 | 69 | - Setup build by github actions 70 | - Update docker image version 71 | - Update package version 72 | - Test with networkidle2 73 | - Update version 74 | - Return default wait until to networkidle0 75 | - Publish reverted version 76 | - Update code to new linter version 77 | - Delete dead code 78 | 79 | ### Eat 80 | 81 | - Add goto timeout 82 | 83 | ## [1.0.0] - 2022-12-27 84 | 85 | ### ⚙️ Miscellaneous Tasks 86 | 87 | - Transfer to github 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /src/services/json-logger.service.ts: -------------------------------------------------------------------------------- 1 | import { LoggerService } from "@nestjs/common"; 2 | import { hostname } from "os"; 3 | import { config } from "../config"; 4 | import { LogLevels } from "../LogLevels"; 5 | 6 | export class JsonLogger implements LoggerService { 7 | protected hostname: string = hostname(); 8 | private minLogLevel: number; 9 | 10 | constructor() { 11 | // Получаем минимальный уровень логирования из конфигурации 12 | this.minLogLevel = config.logLevel; 13 | } 14 | 15 | /** 16 | * Write a 'log' level log. 17 | */ 18 | public log(message: string) { 19 | this.writeJson(message, LogLevels.INFO); 20 | } 21 | 22 | /** 23 | * Write an 'error' level log. 24 | */ 25 | public error(message: string) { 26 | this.writeJson(message, LogLevels.ERROR); 27 | } 28 | 29 | /** 30 | * Write a 'fatal' level log. 31 | */ 32 | public fatal(message: string) { 33 | this.writeJson(message, LogLevels.FATAL); 34 | } 35 | 36 | /** 37 | * Write a 'warn' level log. 38 | */ 39 | public warn(message: string) { 40 | this.writeJson(message, LogLevels.WARN); 41 | } 42 | 43 | /** 44 | * Write a 'debug' level log. 45 | */ 46 | public debug(message: string) { 47 | this.writeJson(message, LogLevels.DEBUG); 48 | } 49 | 50 | /** 51 | * Write a 'verbose' level log. 52 | */ 53 | public verbose(message: string) { 54 | this.writeJson(message, LogLevels.TRACE); 55 | } 56 | 57 | /** 58 | * Write a 'trace' level log. 59 | */ 60 | public trace(message: string) { 61 | this.writeJson(message, LogLevels.TRACE); 62 | } 63 | 64 | public extraLogs( 65 | message: string, 66 | level: number, 67 | extraProps: object = {}, 68 | ): void { 69 | this.writeJson(message, level, extraProps); 70 | } 71 | 72 | protected writeJson( 73 | message: string, 74 | level: number, 75 | extraProps: object = {}, 76 | ): void { 77 | // Проверяем, нужно ли выводить лог в зависимости от уровня 78 | if (level < this.minLogLevel) { 79 | return; 80 | } 81 | 82 | console.log( 83 | JSON.stringify({ 84 | message: message, 85 | ...extraProps, 86 | time: Date.now(), 87 | level: level, 88 | hostname: this.hostname, 89 | service: "botview", 90 | }), 91 | ); 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /src/controllers/metrics/metrics.controller.spec.ts: -------------------------------------------------------------------------------- 1 | import { Test, TestingModule } from "@nestjs/testing"; 2 | import { MetricsController } from "./metrics.controller"; 3 | import { Response } from "express"; 4 | 5 | describe("MetricsController", () => { 6 | let appController: MetricsController; 7 | 8 | beforeEach(async () => { 9 | const app: TestingModule = await Test.createTestingModule({ 10 | controllers: [MetricsController], 11 | providers: [], 12 | }).compile(); 13 | 14 | appController = app.get(MetricsController); 15 | }); 16 | 17 | describe("getMetrics", () => { 18 | it("should return metrics with correct content type and response", async () => { 19 | const mockSet = jest.fn().mockReturnThis(); 20 | const mockSend = jest.fn().mockReturnThis(); 21 | const mockResponse = { 22 | set: mockSet, 23 | send: mockSend, 24 | } as unknown as Response; 25 | 26 | const result = await appController.getMetrics(mockResponse); 27 | 28 | // Check that response methods are called correctly 29 | expect(mockSet).toHaveBeenCalledTimes(1); 30 | expect(mockSet).toHaveBeenCalledWith( 31 | "Content-Type", 32 | expect.stringContaining("text/plain"), 33 | ); 34 | 35 | expect(mockSend).toHaveBeenCalledTimes(1); 36 | 37 | // Check that result contains Prometheus metrics 38 | expect(mockSend).toHaveBeenCalledWith( 39 | expect.stringContaining("# HELP"), 40 | ); 41 | expect(mockSend).toHaveBeenCalledWith( 42 | expect.stringContaining("# TYPE"), 43 | ); 44 | 45 | // Check that metrics contain CPU and RAM consumption values 46 | expect(mockSend).toHaveBeenCalledWith( 47 | expect.stringContaining("process_cpu_user_seconds_total"), 48 | ); 49 | expect(mockSend).toHaveBeenCalledWith( 50 | expect.stringContaining("process_cpu_system_seconds_total"), 51 | ); 52 | expect(mockSend).toHaveBeenCalledWith( 53 | expect.stringContaining("process_resident_memory_bytes"), 54 | ); 55 | expect(mockSend).toHaveBeenCalledWith( 56 | expect.stringContaining("nodejs_heap_size_total_bytes"), 57 | ); 58 | expect(mockSend).toHaveBeenCalledWith( 59 | expect.stringContaining("nodejs_heap_size_used_bytes"), 60 | ); 61 | 62 | // Check method result 63 | expect(result).toBe(mockResponse); 64 | }); 65 | }); 66 | }); 67 | -------------------------------------------------------------------------------- /test/app.e2e-spec.ts: -------------------------------------------------------------------------------- 1 | import { Test, TestingModule } from "@nestjs/testing"; 2 | import { INestApplication } from "@nestjs/common"; 3 | import * as request from "supertest"; 4 | import { AppModule } from "./../src/app.module"; 5 | 6 | describe("AppController (e2e)", () => { 7 | let app: INestApplication; 8 | 9 | beforeEach(async () => { 10 | const moduleFixture: TestingModule = await Test.createTestingModule({ 11 | imports: [AppModule], 12 | }).compile(); 13 | 14 | app = moduleFixture.createNestApplication(); 15 | await app.init(); 16 | }); 17 | 18 | afterEach(async () => { 19 | await app.close(); 20 | }); 21 | 22 | describe("Prerender", () => { 23 | it("should prerender https://mts.ru/ and return HTML content", async () => { 24 | // eslint-disable-next-line @typescript-eslint/no-unsafe-argument 25 | const response = await request(app.getHttpServer()) 26 | .get("/render/https://mts.ru/") 27 | .expect(200) 28 | .expect("Content-Type", "text/html; charset=utf-8"); 29 | 30 | // Check that response contains HTML content 31 | expect(response.text).toContain(""); 32 | expect(response.text).toContain(""); 34 | expect(response.text).toContain(""); 35 | expect(response.text).toContain(" { 45 | // eslint-disable-next-line @typescript-eslint/no-unsafe-argument 46 | await request(app.getHttpServer()).get("/render/").expect(400); 47 | }); 48 | 49 | it("should handle non-existent domain", async () => { 50 | // eslint-disable-next-line @typescript-eslint/no-unsafe-argument 51 | await request(app.getHttpServer()) 52 | .get("/render/https://non-existent-domain-12345.com/") 53 | .expect(500); 54 | }, 30000); 55 | 56 | it("should return custom status code when prerender-status meta is present", async () => { 57 | // This would need a test page that returns custom status 58 | // For now, just test that the endpoint works 59 | // eslint-disable-next-line @typescript-eslint/no-unsafe-argument 60 | const response = await request(app.getHttpServer()) 61 | .get("/render/https://httpbin.org/html") 62 | .expect(200); 63 | 64 | expect(response.text).toContain(""); 65 | }, 30000); 66 | }); 67 | }); 68 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "botview", 3 | "version": "2.3.5", 4 | "description": "Makes your SPA application visible to search robots.", 5 | "author": "MobileTeleSystems", 6 | "homepage": "https://github.com/MobileTeleSystems/botview", 7 | "bugs": "https://github.com/MobileTeleSystems/botview/issues", 8 | "private": true, 9 | "license": "MIT", 10 | "repository": { 11 | "type": "git", 12 | "url": "https://github.com/MobileTeleSystems/botview.git" 13 | }, 14 | "scripts": { 15 | "prebuild": "rimraf dist", 16 | "build": "nest build", 17 | "format": "prettier --write \"src/**/*.ts\" \"test/**/*.ts\"", 18 | "start": "nest start", 19 | "start:dev": "nest start --watch", 20 | "start:debug": "nest start --debug --watch", 21 | "start:prod": "node dist/main", 22 | "lint": "eslint \"{src,apps,libs,test}/**/*.ts\" --fix", 23 | "test": "jest", 24 | "test:watch": "jest --watch", 25 | "test:cov": "jest --coverage", 26 | "test:debug": "node --inspect-brk -r tsconfig-paths/register -r ts-node/register node_modules/.bin/jest --runInBand", 27 | "test:e2e": "jest --config ./test/jest-e2e.json", 28 | "release": "cliff-jumper --name 'botview' --package-path '.' --no-skip-changelog --no-skip-tag" 29 | }, 30 | "dependencies": { 31 | "@nestjs/common": "11.1.9", 32 | "@nestjs/core": "11.1.9", 33 | "@nestjs/platform-express": "11.1.9", 34 | "@playwright/browser-chromium": "^1.56.1", 35 | "playwright": "^1.56.1", 36 | "prom-client": "15.1.3", 37 | "reflect-metadata": "0.2.2", 38 | "rimraf": "6.1.0", 39 | "rxjs": "7.8.2" 40 | }, 41 | "devDependencies": { 42 | "@commitlint/cli": "^20.1.0", 43 | "@commitlint/config-conventional": "^20.0.0", 44 | "@favware/cliff-jumper": "^6.0.0", 45 | "@eslint/eslintrc": "^3.3.1", 46 | "@eslint/js": "^9.39.1", 47 | "@nestjs/cli": "^11.0.11", 48 | "@nestjs/schematics": "^11.0.9", 49 | "@nestjs/testing": "^11.1.9", 50 | "@swc/cli": "^0.7.9", 51 | "@swc/core": "^1.15.2", 52 | "fastify": "^5.6.2", 53 | "@types/express": "^5.0.5", 54 | "@types/jest": "^30.0.0", 55 | "@types/node": "^24.10.1", 56 | "@types/node-fetch": "^3.0.2", 57 | "@types/sharp": "^0.32.0", 58 | "@types/supertest": "^6.0.3", 59 | "eslint": "^9.39.1", 60 | "eslint-config-prettier": "^10.1.8", 61 | "eslint-plugin-prettier": "^5.5.4", 62 | "globals": "^16.5.0", 63 | "husky": "^9.1.7", 64 | "jest": "^30.2.0", 65 | "lint-staged": "^16.2.6", 66 | "markdownlint": "^0.39.0", 67 | "markdownlint-cli": "^0.46.0", 68 | "prettier": "^3.6.2", 69 | "source-map-support": "^0.5.21", 70 | "supertest": "^7.1.4", 71 | "ts-jest": "^29.4.5", 72 | "ts-loader": "^9.5.4", 73 | "ts-node": "^10.9.2", 74 | "tsconfig-paths": "^4.2.0", 75 | "typescript": "^5.9.3", 76 | "typescript-eslint": "^8.47.0" 77 | }, 78 | "jest": { 79 | "moduleFileExtensions": [ 80 | "js", 81 | "json", 82 | "ts" 83 | ], 84 | "rootDir": "src", 85 | "testRegex": ".*\\.spec\\.ts$", 86 | "transform": { 87 | "^.+\\.(t|j)s$": "ts-jest" 88 | }, 89 | "collectCoverageFrom": [ 90 | "**/*.(t|j)s" 91 | ], 92 | "coverageDirectory": "../coverage", 93 | "testEnvironment": "node" 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /src/config.ts: -------------------------------------------------------------------------------- 1 | // array of basic auths in format "url:login:password" (URL-encoding is optional), use comma or space as separator 2 | import { LogLevels } from "./LogLevels"; 3 | 4 | process.env.BASIC_AUTHS ||= void 0; 5 | process.env.BOTVIEW_BASIC_AUTHS ||= process.env.BASIC_AUTHS; // BASIC_AUTHS - legacy env 6 | process.env.BOTVIEW_NAV_TIMEOUT ||= "30000"; // ms 7 | process.env.BOTVIEW_DEFAULT_TIMEOUT ||= "15000"; // ms 8 | process.env.BOTVIEW_WAIT_UNTIL ||= "networkidle"; // load | domcontentloaded | networkidle | commit 9 | process.env.BOTVIEW_BLOCK_IMAGES ||= "true"; // true to block images 10 | process.env.BOTVIEW_BLOCK_STYLESHEETS ||= "true"; // true to block stylesheets 11 | process.env.BOTVIEW_BLOCK_FONTS ||= "true"; // true to block fonts 12 | process.env.BOTVIEW_BLOCK_MEDIA ||= "true"; // true to block media 13 | process.env.BOTVIEW_BLOCK_URLS ||= "https://an.yandex.ru, https://mc.yandex.ru"; // comma or space separated list of urls to block (URL-encoding is optional) 14 | process.env.BOTVIEW_BLOCK_URLS_REGEX ||= ""; // comma or space separated list of regexes to block (URL-encoding is optional) 15 | process.env.BOTVIEW_VIEWPORT ||= "360x640"; // screen viewport, format: WIDTHxHEIGHT (e.g. 360x640) 16 | process.env.BOTVIEW_LOG_LEVEL ||= LogLevels.INFO.toString(); // log level: trace, debug, info, warn, error, fatal 17 | 18 | function getLogLevelChecked(logLevel: string): number { 19 | // Если это число и оно есть в enum — используем 20 | const numLevel = Number(logLevel); 21 | if (!isNaN(numLevel) && Object.values(LogLevels).includes(numLevel)) { 22 | return numLevel; 23 | } 24 | // Если это строка-ключ enum (например, "INFO") 25 | if (typeof logLevel === "string" && logLevel.toUpperCase() in LogLevels) { 26 | return LogLevels[ 27 | logLevel.toUpperCase() as keyof typeof LogLevels 28 | ] as number; 29 | } 30 | throw new Error(`Unknown BOTVIEW_LOG_LEVEL: ${logLevel}`); 31 | } 32 | 33 | function parseViewport(str: string): { width: number; height: number } { 34 | const match = /^([0-9]+)x([0-9]+)$/i.exec(str); 35 | if (!match) return { width: 360, height: 640 }; 36 | return { width: Number(match[1]), height: Number(match[2]) }; 37 | } 38 | 39 | export const config = { 40 | basicAuth: process.env.BOTVIEW_BASIC_AUTHS, 41 | basicAuthParsed: process.env.BOTVIEW_BASIC_AUTHS 42 | ? process.env.BOTVIEW_BASIC_AUTHS.split(/[\s,]+/).map((auth) => { 43 | const parts = decodeURIComponent(auth.trim()).split(":"); 44 | return [parts[0], parts[1], parts[2]] as [string, string, string]; 45 | }) 46 | : [], 47 | navTimeout: Number(process.env.BOTVIEW_NAV_TIMEOUT), 48 | defaultTimeout: Number(process.env.BOTVIEW_DEFAULT_TIMEOUT), 49 | waitUntil: process.env.BOTVIEW_WAIT_UNTIL, 50 | blockImages: process.env.BOTVIEW_BLOCK_IMAGES === "true", 51 | blockStylesheets: process.env.BOTVIEW_BLOCK_STYLESHEETS === "true", 52 | blockFonts: process.env.BOTVIEW_BLOCK_FONTS === "true", 53 | blockMedia: process.env.BOTVIEW_BLOCK_MEDIA === "true", 54 | blockUrls: process.env.BOTVIEW_BLOCK_URLS 55 | ? process.env.BOTVIEW_BLOCK_URLS.split(/[\s,]+/) 56 | .map((url) => decodeURIComponent(url.trim())) 57 | .filter(Boolean) 58 | : [], 59 | blockUrlsRegex: process.env.BOTVIEW_BLOCK_URLS_REGEX 60 | ? process.env.BOTVIEW_BLOCK_URLS_REGEX.split(/[\s,]+/) 61 | .map((regex) => new RegExp(decodeURIComponent(regex.trim()))) 62 | .filter(Boolean) 63 | : [], 64 | logLevel: getLogLevelChecked(process.env.BOTVIEW_LOG_LEVEL), 65 | viewport: parseViewport(process.env.BOTVIEW_VIEWPORT), 66 | }; 67 | -------------------------------------------------------------------------------- /cliff.toml: -------------------------------------------------------------------------------- 1 | # git-cliff ~ default configuration file 2 | # https://git-cliff.org/docs/configuration 3 | # 4 | # Lines starting with "#" are comments. 5 | # Configuration options are organized into tables and keys. 6 | # See documentation for more information on available options. 7 | 8 | [changelog] 9 | # changelog header 10 | header = """ 11 | # Changelog\n 12 | All notable changes to this project will be documented in this file.\n 13 | """ 14 | # template for the changelog body 15 | # https://keats.github.io/tera/docs/#introduction 16 | body = """ 17 | {% if version %}\ 18 | ## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }} 19 | {% else %}\ 20 | ## [unreleased] 21 | {% endif %}\ 22 | {% for group, commits in commits | group_by(attribute="group") %} 23 | ### {{ group | striptags | trim | upper_first }} 24 | {% for commit in commits %} 25 | - {% if commit.scope %}*({{ commit.scope }})* {% endif %}\ 26 | {% if commit.breaking %}[**breaking**] {% endif %}\ 27 | {{ commit.message | upper_first }}\ 28 | {% endfor %} 29 | {% endfor %}\n 30 | """ 31 | # template for the changelog footer 32 | footer = """ 33 | 34 | """ 35 | # remove the leading and trailing s 36 | trim = true 37 | # postprocessors 38 | postprocessors = [ 39 | # { pattern = '', replace = "https://github.com/orhun/git-cliff" }, # replace repository URL 40 | ] 41 | 42 | [git] 43 | # parse the commits based on https://www.conventionalcommits.org 44 | conventional_commits = true 45 | # filter out the commits that are not conventional 46 | filter_unconventional = true 47 | # process each line of a commit as an individual commit 48 | split_commits = false 49 | # regex for preprocessing the commit messages 50 | commit_preprocessors = [ 51 | # Replace issue numbers 52 | #{ pattern = '\((\w+\s)?#([0-9]+)\)', replace = "([#${2}](/issues/${2}))"}, 53 | # Check spelling of the commit with https://github.com/crate-ci/typos 54 | # If the spelling is incorrect, it will be automatically fixed. 55 | #{ pattern = '.*', replace_command = 'typos --write-changes -' }, 56 | ] 57 | # regex for parsing and grouping commits 58 | commit_parsers = [ 59 | { message = "^feat", group = "🚀 Features" }, 60 | { message = "^fix", group = "🐛 Bug Fixes" }, 61 | { message = "^doc", group = "📚 Documentation" }, 62 | { message = "^perf", group = "⚡ Performance" }, 63 | { message = "^refactor", group = "🚜 Refactor" }, 64 | { message = "^style", group = "🎨 Styling" }, 65 | { message = "^test", group = "🧪 Testing" }, 66 | { message = "^chore\\(release\\): prepare for", skip = true }, 67 | { message = "^chore\\(deps.*\\)", skip = true }, 68 | { message = "^chore\\(pr\\)", skip = true }, 69 | { message = "^chore\\(pull\\)", skip = true }, 70 | { message = "^chore|^ci", group = "⚙️ Miscellaneous Tasks" }, 71 | { body = ".*security", group = "🛡️ Security" }, 72 | { message = "^revert", group = "◀️ Revert" }, 73 | ] 74 | # protect breaking changes from being skipped due to matching a skipping commit_parser 75 | protect_breaking_commits = false 76 | # filter out the commits that are not matched by commit parsers 77 | filter_commits = false 78 | # regex for matching git tags 79 | # tag_pattern = "v[0-9].*" 80 | # regex for skipping tags 81 | # skip_tags = "" 82 | # regex for ignoring tags 83 | # ignore_tags = "" 84 | # sort the tags topologically 85 | topo_order = false 86 | # sort the commits inside sections by oldest/newest order 87 | sort_commits = "oldest" 88 | # limit the number of commits included in the changelog. 89 | # limit_commits = 42 90 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, caste, color, religion, or sexual 10 | identity and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience 24 | * Focusing on what is best not just for us as individuals, but for the overall community 25 | 26 | Examples of unacceptable behavior include: 27 | 28 | * The use of sexualized language or imagery, and sexual attention or advances of any kind 29 | * Trolling, insulting or derogatory comments, and personal or political attacks 30 | * Public or private harassment 31 | * Publishing others' private information, such as a physical or email address, without their explicit permission 32 | * Other conduct which could reasonably be considered inappropriate in a professional setting 33 | 34 | ## Enforcement Responsibilities 35 | 36 | Community leaders are responsible for clarifying and enforcing our standards of 37 | acceptable behavior and will take appropriate and fair corrective action in 38 | response to any behavior that they deem inappropriate, threatening, offensive, 39 | or harmful. 40 | 41 | Community leaders have the right and responsibility to remove, edit, or reject 42 | comments, commits, code, wiki edits, issues, and other contributions that are 43 | not aligned to this Code of Conduct, and will communicate reasons for moderation 44 | decisions when appropriate. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies within all community spaces, and also applies when 49 | an individual is officially representing the community in public spaces. 50 | Examples of representing our community include using an official e-mail address, 51 | posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. 53 | 54 | ## Enforcement 55 | 56 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 57 | reported to the community leaders responsible for enforcement at 58 | mts.github@mts.ru. 59 | All complaints will be reviewed and investigated promptly and fairly. 60 | 61 | All community leaders are obligated to respect the privacy and security of the 62 | reporter of any incident. 63 | 64 | ## Enforcement Guidelines 65 | 66 | Community leaders will follow these Community Impact Guidelines in determining 67 | the consequences for any action they deem in violation of this Code of Conduct: 68 | 69 | ### 1. Correction 70 | 71 | **Community Impact**: Use of inappropriate language or other behavior deemed 72 | unprofessional or unwelcome in the community. 73 | 74 | **Consequence**: A private, written warning from community leaders, providing 75 | clarity around the nature of the violation and an explanation of why the 76 | behavior was inappropriate. A public apology may be requested. 77 | 78 | ### 2. Warning 79 | 80 | **Community Impact**: A violation through a single incident or series of 81 | actions. 82 | 83 | **Consequence**: A warning with consequences for continued behavior. No 84 | interaction with the people involved, including unsolicited interaction with 85 | those enforcing the Code of Conduct, for a specified period of time. This 86 | includes avoiding interactions in community spaces as well as external channels 87 | like social media. Violating these terms may lead to a temporary or permanent 88 | ban. 89 | 90 | ### 3. Temporary Ban 91 | 92 | **Community Impact**: A serious violation of community standards, including 93 | sustained inappropriate behavior. 94 | 95 | **Consequence**: A temporary ban from any sort of interaction or public 96 | communication with the community for a specified period of time. No public or 97 | private interaction with the people involved, including unsolicited interaction 98 | with those enforcing the Code of Conduct, is allowed during this period. 99 | Violating these terms may lead to a permanent ban. 100 | 101 | ### 4. Permanent Ban 102 | 103 | **Community Impact**: Demonstrating a pattern of violation of community 104 | standards, including sustained inappropriate behavior, harassment of an 105 | individual, or aggression toward or disparagement of classes of individuals. 106 | 107 | **Consequence**: A permanent ban from any sort of public interaction within the 108 | community. 109 | 110 | ## Attribution 111 | 112 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 113 | version 2.1, available at 114 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. 115 | 116 | Community Impact Guidelines were inspired by 117 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 118 | 119 | For answers to common questions about this code of conduct, see the FAQ at 120 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at 121 | [https://www.contributor-covenant.org/translations][translations]. 122 | 123 | [homepage]: https://www.contributor-covenant.org 124 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html 125 | [Mozilla CoC]: https://github.com/mozilla/diversity 126 | [FAQ]: https://www.contributor-covenant.org/faq 127 | [translations]: https://www.contributor-covenant.org/translations 128 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Microservice for prerendering web pages 2 | 3 | Web pages rendered on the client side without SSR are not visible to bots, because bots can't render pages. This microservice allows all bots to see the already rendered page. 4 | 5 | Features: 6 | 7 | - Allows bots to see content on web pages without SSR 8 | - Works with any framework, e.g. JQuery, Angular 1, StencilJS, and others 9 | - Works with any technology, e.g. WebComponents, Microfrontends, Dynamic Content, and others 10 | - Fast and low memory usage, uses only a web engine without a heavy browser 11 | - Prerendering can work not only for bots but also for all clients if the application supports rerendering 12 | - Includes a Prometheus metrics exporter 13 | 14 | ## Try 15 | 16 | To try the microservice features, run the container with the command: 17 | 18 | ```sh 19 | docker run -it --rm -p 3000:3000 mtsrus/botview 20 | ``` 21 | 22 | Now you can open the browser and check the result with the command: 23 | 24 | ```sh 25 | http://localhost:3000/render/https://mts.ru/ 26 | ``` 27 | 28 | The fully rendered page should be displayed, including all content. 29 | 30 | ## Use 31 | 32 | To start the microservice in production, use the command: 33 | 34 | ```sh 35 | docker run -d --restart always -p 3000:3000 mtsrus/botview 36 | ``` 37 | 38 | ## Return status code 39 | 40 | Add the following element to your HTML and specify the required response status code: 41 | 42 | ```html 43 | 44 | ``` 45 | 46 | The server will return a response with the specified status code. 47 | 48 | ## Container parameters 49 | 50 | - `-e BOTVIEW_BASIC_AUTHS=""` - An array of endpoints with basic authorization parameters, default is empty. 51 | Format: "url:login:password" (URL-encoding is optional for simple URLs). Use comma or space as a separator. 52 | Example: `"https://example.com:user:pass,https://test.com:admin:secret"` 53 | 54 | - `-e BOTVIEW_NAV_TIMEOUT=30000` - [This setting will change the default maximum navigation time](https://playwright.dev/docs/api/class-page#page-set-default-navigation-timeout), 55 | default 30000. 56 | 57 | - `-e BOTVIEW_DEFAULT_TIMEOUT=15000` - [This setting will change the default timeout](https://playwright.dev/docs/api/class-page#page-set-default-timeout), 58 | default 15000. 59 | 60 | - `-e BOTVIEW_WAIT_UNTIL=networkidle` - [When to consider waiting succeeds. Given an array of event strings, waiting is considered to be successful after all events have been fired](https://playwright.dev/docs/api/class-page#page-goto), 61 | default networkidle. 62 | 63 | - `-e BOTVIEW_VIEWPORT="360x640"` - Set the screen resolution (viewport) for the browser context, format is WIDTHxHEIGHT (e.g. `1280x720`). Default is `360x640`. 64 | Example: `-e BOTVIEW_VIEWPORT="1280x720"` 65 | 66 | - `-e BOTVIEW_BLOCK_IMAGES=true` - Block loading of images to improve performance, default true. 67 | 68 | - `-e BOTVIEW_BLOCK_STYLESHEETS=true` - Block loading of stylesheets to improve performance, default true. 69 | 70 | - `-e BOTVIEW_BLOCK_FONTS=true` - Block loading of fonts to improve performance, default true. 71 | 72 | - `-e BOTVIEW_BLOCK_MEDIA=true` - Block loading of media files to improve performance, default true. 73 | 74 | - `-e BOTVIEW_BLOCK_URLS="https://an.yandex.ru, https://mc.yandex.ru"` - Comma or space separated list of URL prefixes to block (uses startsWith matching), by default blocks Yandex analytics. 75 | Example: `"https://google-analytics.com,https://facebook.com/tr"` will block all requests starting with these URLs. 76 | Set to an empty string `""` to disable default blocking. 77 | 78 | - `-e BOTVIEW_BLOCK_URLS_REGEX=""` - Comma or space separated list of regular expressions to block URLs, default is empty. 79 | Example: `".*\.ads\..*,.*tracking.*"` will block URLs containing ".ads." or "tracking" anywhere in the URL. 80 | 81 | - `-e BOTVIEW_LOG_LEVEL=info` - Log level for the application, default "info". 82 | Available levels: `trace`, `debug`, `info`, `warn`, `error`, `fatal`. 83 | 84 | ## Troubleshooting Timeout Issues 85 | 86 | If you encounter timeout errors during prerendering, it is usually caused by "leaked requests" - network requests that do not complete properly and prevent the page from finishing loading. 87 | 88 | To fix this: 89 | 90 | 1. **Check the logs** for entries containing `"Leaked requests"` - these will show which URLs are causing the timeout 91 | 2. **Add problematic URLs to BOTVIEW_BLOCK_URLS** to prevent them from being loaded during prerendering. 92 | 3. **Use the container parameter** `-e BOTVIEW_BLOCK_URLS="url1,url2,url3"` to block specific URLs. 93 | 94 | **Example:** 95 | 96 | ```sh 97 | docker run -d -p 3000:3000 -e BOTVIEW_BLOCK_URLS="https://problematic-analytics.com,https://slow-tracker.net" mtsrus/botview 98 | ``` 99 | 100 | **Default blocked URLs:** 101 | By default, `https://an.yandex.ru` and `https://mc.yandex.ru` are blocked due to a Chromium bug that prevents proper handling of these analytics requests, which can cause timeouts. 102 | 103 | ## Metrics Prometheus 104 | 105 | The microservice has built-in Prometheus monitoring and is available at the endpoint `/metrics`. 106 | 107 | Block this endpoint on the proxy if you do not need to provide access to metrics from outside your network. 108 | 109 | ## Proxy server setup 110 | 111 | To catch bots and send them to the prerendering microservice, you need to configure a proxy server. 112 | 113 | Example config for Nginx: 114 | 115 | ```sh 116 | location / { 117 | 118 | set $prerender 0; 119 | 120 | # all popular bots 121 | if ($http_user_agent ~* "googlebot|facebookexternalhit|twitterbot|telegrambot|yahoo|bingbot|baiduspider|yandex|yeti|yodaobot|gigabot|ia_archiver|developers\.google\.com") { 122 | set $prerender 1; 123 | } 124 | 125 | # bot with escape fragments 126 | if ($args ~ "_escaped_fragment_") { 127 | set $prerender 1; 128 | } 129 | 130 | # prerender microservice 131 | if ($http_user_agent ~ "Prerender") { 132 | set $prerender 0; 133 | } 134 | 135 | # static files 136 | if ($uri ~ \.[a-zA-Z0-9]+$) { 137 | set $prerender 0; 138 | } 139 | 140 | if ($prerender = 1) { 141 | rewrite (.*) /render/$scheme://$host$1?prerender break; 142 | proxy_pass http://localhost:3000; 143 | } 144 | 145 | if ($prerender = 0) { 146 | proxy_pass http://localhost:80; 147 | } 148 | } 149 | ``` 150 | -------------------------------------------------------------------------------- /src/controllers/render-controller/render.controller.spec.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/no-unsafe-argument */ 2 | import { Test, TestingModule } from "@nestjs/testing"; 3 | import { BadRequestException } from "@nestjs/common"; 4 | import { Response } from "express"; 5 | import { RenderController } from "./render.controller"; 6 | import { PrerenderService } from "../../services/prerender.service"; 7 | 8 | interface MockRequest { 9 | url: string; 10 | headers: Record; 11 | } 12 | 13 | describe("RenderController", () => { 14 | let renderController: RenderController; 15 | let mockResponse: Partial; 16 | let mockRender: jest.Mock; 17 | 18 | beforeEach(async () => { 19 | mockRender = jest.fn(); 20 | const mockPrerenderService = { 21 | render: mockRender, 22 | }; 23 | 24 | mockResponse = { 25 | status: jest.fn().mockReturnThis(), 26 | send: jest.fn().mockReturnThis(), 27 | }; 28 | 29 | const app: TestingModule = await Test.createTestingModule({ 30 | controllers: [RenderController], 31 | providers: [ 32 | { 33 | provide: PrerenderService, 34 | useValue: mockPrerenderService, 35 | }, 36 | ], 37 | }).compile(); 38 | 39 | renderController = app.get(RenderController); 40 | }); 41 | 42 | describe("getRender", () => { 43 | it("should successfully render a valid URL", async () => { 44 | const mockRequest: MockRequest = { 45 | url: "/render/https://example.com", 46 | headers: { "user-agent": "test-agent" }, 47 | }; 48 | 49 | const mockResult = { 50 | statusCode: "200", 51 | pageContent: "Test Content", 52 | }; 53 | 54 | mockRender.mockResolvedValue(mockResult); 55 | 56 | await renderController.getRender( 57 | mockRequest as any, 58 | mockResponse as Response, 59 | ); 60 | 61 | expect(mockRender).toHaveBeenCalledWith( 62 | "https://example.com", 63 | mockRequest.headers, 64 | ); 65 | expect(mockResponse.status).toHaveBeenCalledWith(200); 66 | expect(mockResponse.send).toHaveBeenCalledWith( 67 | mockResult.pageContent, 68 | ); 69 | }); 70 | 71 | it("should throw BadRequestException when URL is empty", async () => { 72 | const mockRequest: MockRequest = { 73 | url: "/render/", 74 | headers: { "user-agent": "test-agent" }, 75 | }; 76 | 77 | await expect( 78 | renderController.getRender( 79 | mockRequest as any, 80 | mockResponse as Response, 81 | ), 82 | ).rejects.toThrow(BadRequestException); 83 | await expect( 84 | renderController.getRender( 85 | mockRequest as any, 86 | mockResponse as Response, 87 | ), 88 | ).rejects.toThrow("Parameter 'url' is required."); 89 | }); 90 | 91 | it("should throw BadRequestException when URL is missing", async () => { 92 | const mockRequest: MockRequest = { 93 | url: "/render", 94 | headers: { "user-agent": "test-agent" }, 95 | }; 96 | 97 | await expect( 98 | renderController.getRender( 99 | mockRequest as any, 100 | mockResponse as Response, 101 | ), 102 | ).rejects.toThrow(BadRequestException); 103 | }); 104 | 105 | it("should handle URL decoding properly", async () => { 106 | const encodedUrl = encodeURIComponent( 107 | "https://example.com/path with spaces", 108 | ); 109 | const mockRequest: MockRequest = { 110 | url: `/render/${encodedUrl}`, 111 | headers: { "user-agent": "test-agent" }, 112 | }; 113 | 114 | const mockResult = { 115 | statusCode: "200", 116 | pageContent: "Test Content", 117 | }; 118 | 119 | mockRender.mockResolvedValue(mockResult); 120 | 121 | await renderController.getRender( 122 | mockRequest as any, 123 | mockResponse as Response, 124 | ); 125 | 126 | expect(mockRender).toHaveBeenCalledWith( 127 | "https://example.com/path with spaces", 128 | mockRequest.headers, 129 | ); 130 | }); 131 | 132 | it("should handle invalid URL formats", async () => { 133 | const mockRequest: MockRequest = { 134 | url: "/render/not-a-valid-url", 135 | headers: { "user-agent": "test-agent" }, 136 | }; 137 | 138 | mockRender.mockRejectedValue(new Error("Invalid URL")); 139 | 140 | await expect( 141 | renderController.getRender( 142 | mockRequest as any, 143 | mockResponse as Response, 144 | ), 145 | ).rejects.toThrow("Invalid URL"); 146 | }); 147 | 148 | it("should handle prerender service errors", async () => { 149 | const mockRequest: MockRequest = { 150 | url: "/render/https://example.com", 151 | headers: { "user-agent": "test-agent" }, 152 | }; 153 | 154 | mockRender.mockRejectedValue(new Error("Navigation timeout")); 155 | 156 | await expect( 157 | renderController.getRender( 158 | mockRequest as any, 159 | mockResponse as Response, 160 | ), 161 | ).rejects.toThrow("Navigation timeout"); 162 | }); 163 | 164 | it("should use default status code 200 when statusCode is null", async () => { 165 | const mockRequest: MockRequest = { 166 | url: "/render/https://example.com", 167 | headers: { "user-agent": "test-agent" }, 168 | }; 169 | 170 | const mockResult = { 171 | statusCode: null, 172 | pageContent: "Test Content", 173 | }; 174 | 175 | mockRender.mockResolvedValue(mockResult); 176 | 177 | await renderController.getRender( 178 | mockRequest as any, 179 | mockResponse as Response, 180 | ); 181 | 182 | expect(mockResponse.status).toHaveBeenCalledWith(200); 183 | }); 184 | 185 | it("should use default status code 200 when statusCode is invalid", async () => { 186 | const mockRequest: MockRequest = { 187 | url: "/render/https://example.com", 188 | headers: { "user-agent": "test-agent" }, 189 | }; 190 | 191 | const mockResult = { 192 | statusCode: "invalid", 193 | pageContent: "Test Content", 194 | }; 195 | 196 | mockRender.mockResolvedValue(mockResult); 197 | 198 | await renderController.getRender( 199 | mockRequest as any, 200 | mockResponse as Response, 201 | ); 202 | 203 | expect(mockResponse.status).toHaveBeenCalledWith(200); 204 | }); 205 | 206 | it("should handle custom status codes", async () => { 207 | const mockRequest: MockRequest = { 208 | url: "/render/https://example.com", 209 | headers: { "user-agent": "test-agent" }, 210 | }; 211 | 212 | const mockResult = { 213 | statusCode: "404", 214 | pageContent: "Not Found", 215 | }; 216 | 217 | mockRender.mockResolvedValue(mockResult); 218 | 219 | await renderController.getRender( 220 | mockRequest as any, 221 | mockResponse as Response, 222 | ); 223 | 224 | expect(mockResponse.status).toHaveBeenCalledWith(404); 225 | expect(mockResponse.send).toHaveBeenCalledWith( 226 | mockResult.pageContent, 227 | ); 228 | }); 229 | }); 230 | }); 231 | -------------------------------------------------------------------------------- /src/services/prerender.service.ts: -------------------------------------------------------------------------------- 1 | import { JsonLogger } from "./json-logger.service"; 2 | import { LogLevels } from "../LogLevels"; 3 | import { config } from "../config"; 4 | import { Injectable } from "@nestjs/common"; 5 | import { 6 | chromium, 7 | Browser, 8 | Page, 9 | ConsoleMessage, 10 | Request, 11 | BrowserContextOptions, 12 | } from "playwright"; 13 | import { LeakedRequests } from "../models/LeakedRequests"; 14 | 15 | @Injectable() 16 | export class PrerenderService { 17 | public constructor(private readonly logger: JsonLogger) {} 18 | 19 | public async render(url: string, headers: Headers) { 20 | const browser: Browser = await chromium.launch({ 21 | // headless: false, // for debug 22 | // devtools: true, 23 | // args: [ 24 | // '--no-sandbox', '--disable-setuid-sandbox' 25 | // ], 26 | timeout: config.navTimeout, 27 | }); 28 | 29 | const authHeaders = this.setAuth(url); 30 | const context = await browser.newContext({ 31 | viewport: config.viewport, 32 | ...authHeaders, 33 | }); 34 | 35 | let requests: LeakedRequests[] = []; 36 | 37 | try { 38 | const page: Page = await context.newPage(); 39 | 40 | await page.addInitScript( 41 | (data) => { 42 | Reflect.set(window, "prerender", data); 43 | }, 44 | { 45 | userAgent: headers["user-agent"] as string, 46 | }, 47 | ); 48 | 49 | page.setDefaultNavigationTimeout(config.navTimeout); 50 | page.setDefaultTimeout(config.defaultTimeout); 51 | this.setLogOnConsole(page); 52 | 53 | if ( 54 | config.blockImages || 55 | config.blockStylesheets || 56 | config.blockFonts || 57 | config.blockMedia || 58 | config.blockUrls.length > 0 || 59 | config.blockUrlsRegex.length > 0 60 | ) { 61 | this.setResourceBlocking(page); 62 | } 63 | requests = this.setRequestLeakDetector(page); 64 | 65 | await page.goto(url, { 66 | waitUntil: config.waitUntil as "networkidle", 67 | timeout: config.navTimeout, 68 | }); 69 | const pageContent = await page.content(); // serialized HTML of page DOM. 70 | 71 | const statusCode = await page.evaluate(() => { 72 | return document.head 73 | ?.querySelector('meta[name="prerender-status"]') 74 | ?.getAttribute("content"); 75 | }); 76 | 77 | return { 78 | statusCode, 79 | pageContent, 80 | }; 81 | } catch (error) { 82 | this.checkAndLogLeakedRequests(requests, error); 83 | throw error; 84 | } finally { 85 | await context.close(); 86 | await browser.close(); 87 | } 88 | } 89 | 90 | private setAuth( 91 | url: string, 92 | ): Pick { 93 | if (config.basicAuthParsed.length > 0) { 94 | // eslint-disable-next-line prettier/prettier 95 | for (const [ 96 | authUrl, 97 | username, 98 | password, 99 | ] of config.basicAuthParsed) { 100 | if (url.startsWith(authUrl)) { 101 | return { 102 | httpCredentials: { 103 | username, 104 | password, 105 | }, 106 | }; 107 | } 108 | } 109 | } 110 | 111 | // No basic auth 112 | return {}; 113 | } 114 | 115 | private setLogOnConsole(page: Page): void { 116 | page.on("console", (msg: ConsoleMessage) => { 117 | let level = 10; 118 | const type = msg.type(); 119 | if (type === "log") { 120 | level = 30; 121 | } else if (type === "debug") { 122 | level = 20; 123 | } else if (type === "info") { 124 | level = 30; 125 | } else if (type === "error") { 126 | level = 50; 127 | } else if (type === "warning") { 128 | level = 40; 129 | } else if (type === "trace") { 130 | level = 10; 131 | } 132 | 133 | this.logger.extraLogs(`Browser log: ${msg.text()}`, level, { 134 | location: msg.location() ?? void 0, 135 | args: msg.args() ?? void 0, 136 | }); 137 | }); 138 | } 139 | 140 | private setResourceBlocking(page: Page): void { 141 | void page.route("**/*", async (route) => { 142 | const resourceType = route.request().resourceType(); 143 | const requestUrl = route.request().url(); 144 | 145 | // Block specific URLs if configured 146 | if (config.blockUrls.length > 0) { 147 | for (const blockedUrl of config.blockUrls) { 148 | if (requestUrl.startsWith(blockedUrl)) { 149 | this.logger.debug( 150 | `Blocked URL: ${requestUrl} (matches: ${blockedUrl})`, 151 | ); 152 | await route.abort(); 153 | return; 154 | } 155 | } 156 | } 157 | 158 | // Block URLs matching regex patterns if configured 159 | if (config.blockUrlsRegex.length > 0) { 160 | for (const regex of config.blockUrlsRegex) { 161 | if (regex.test(requestUrl)) { 162 | this.logger.debug( 163 | `Blocked URL by regex: ${requestUrl} (pattern: ${regex.source})`, 164 | ); 165 | await route.abort(); 166 | return; 167 | } 168 | } 169 | } 170 | 171 | // Block images if enabled in config 172 | if (resourceType === "image" && config.blockImages) { 173 | await route.abort(); 174 | return; 175 | } 176 | 177 | // Block stylesheets if enabled in config 178 | if (resourceType === "stylesheet" && config.blockStylesheets) { 179 | await route.abort(); 180 | return; 181 | } 182 | 183 | // Block fonts if enabled in config 184 | if (resourceType === "font" && config.blockFonts) { 185 | await route.abort(); 186 | return; 187 | } 188 | 189 | // Block media if enabled in config 190 | if (resourceType === "media" && config.blockMedia) { 191 | await route.abort(); 192 | return; 193 | } 194 | 195 | // Allow all other requests 196 | await route.continue(); 197 | }); 198 | } 199 | 200 | private setRequestLeakDetector(page: Page): LeakedRequests[] { 201 | const requests: LeakedRequests[] = []; 202 | 203 | page.on("request", (request: Request) => { 204 | const leakedRequest = new LeakedRequests(); 205 | leakedRequest.url = request.url(); 206 | leakedRequest.startTime = Date.now(); 207 | requests.push(leakedRequest); 208 | }); 209 | 210 | page.on("requestfinished", (request: Request) => { 211 | const url = request.url(); 212 | const index = requests.findIndex((lreq) => lreq.url === url); 213 | requests.splice(index, 1); 214 | }); 215 | 216 | page.on("requestfailed", (request: Request) => { 217 | const url = request.url(); 218 | const index = requests.findIndex((lreq) => lreq.url === url); 219 | requests.splice(index, 1); 220 | }); 221 | 222 | return requests; 223 | } 224 | 225 | private checkAndLogLeakedRequests( 226 | requests: LeakedRequests[], 227 | error: unknown, 228 | ) { 229 | if (error instanceof Error && error.name.startsWith("TimeoutError")) { 230 | requests.forEach((lreq) => { 231 | lreq.endTime = Date.now(); 232 | lreq.time = lreq.endTime - lreq.startTime; 233 | }); 234 | this.logger.extraLogs(`Leaked requests`, LogLevels.ERROR, { 235 | requests: requests, 236 | }); 237 | } 238 | } 239 | } 240 | --------------------------------------------------------------------------------