├── .github └── workflows │ └── release.yaml ├── .gitignore ├── .prettierrc ├── CHANGELOG.md ├── README.md ├── assets └── logo.svg ├── build.config.ts ├── package-lock.json ├── package.json ├── src ├── core.ts ├── index.ts ├── types │ ├── Policy.ts │ ├── UserAgent.ts │ └── index.ts └── utils │ ├── index.ts │ ├── size.ts │ └── time.ts └── tsconfig.json /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - "v*.*.*" 7 | 8 | jobs: 9 | publish: 10 | name: Publish 11 | runs-on: ubuntu-latest 12 | permissions: 13 | contents: write 14 | id-token: write 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v4 18 | with: 19 | fetch-depth: 0 20 | 21 | - name: Install Node.js 22 | uses: actions/setup-node@v4 23 | with: 24 | cache: npm 25 | node-version: 18 26 | registry-url: "https://registry.npmjs.org" 27 | 28 | - name: Install dependencies 29 | run: npm install 30 | 31 | - name: Publish 32 | run: npm publish --access public 33 | env: 34 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 35 | NPM_CONFIG_PROVENANCE: true 36 | 37 | - name: Generate changelog 38 | uses: hideoo/changelogithub-action@v1 39 | with: 40 | token: ${{ secrets.GITHUB_TOKEN }} 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # build output 2 | dist/ 3 | # generated types 4 | .astro/ 5 | 6 | # dependencies 7 | node_modules/ 8 | 9 | # logs 10 | npm-debug.log* 11 | yarn-debug.log* 12 | yarn-error.log* 13 | pnpm-debug.log* 14 | 15 | 16 | # environment variables 17 | .env 18 | .env.production 19 | 20 | # macOS-specific files 21 | .DS_Store 22 | 23 | # jetbrains setting folder 24 | .idea/ 25 | src/.DS_Store 26 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "endOfLine": "auto" 3 | } 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | This file documents all significant modifications made to the Astro Integration `astro-robots`. 4 | 5 | ## 2.3.0 6 | 7 | ### Major Updates 8 | 9 | - **`peerDependencies`** Update `"astro": "^4.0.0 || ^5.0.0"` Compatible with version 5.0.0 and above 10 | 11 | ## 2.2.0 12 | 13 | ### Major Updates 14 | 15 | - Updates can now be previewed immediately by visiting `http://localhost:4321/robots.txt`, rather than waiting for the heavy build task to complete. 16 | 17 | ### Bug Fixes 18 | 19 | - `dir` may not exist 20 | 21 | ## 2.1.0 22 | 23 | - Reduce and adjust code logic 24 | - Simplify the error when Sitemap is true and Asconfig.site is not set to a warning that can be ignored 25 | 26 | ## 2.0.1 27 | 28 | - Update the loggerInfo. 29 | 30 | ## 2.0.0 31 | 32 | - Update to AstroIntegrationLogger. 33 | 34 | ## 1.0.2 35 | 36 | ### Patch Changes 37 | 38 | - Bug - Fixed the issue that prevented the correct reading of file size information when output `server`, `hybrid` not `static`. 39 | 40 | ## 1.0.1 41 | 42 | ### Minor Changes 43 | 44 | - Enhancements - Added warnings for robots.txt files exceeding 10KB size limit 45 | 46 | ## 1.0.0 47 | 48 | ### Major Changes 49 | 50 | - Introduced `measureExecutionTime` utility function. 51 | - Introduced `logger` object, ensuring compatibility with Astro2.0 and the latest version 3.0. 52 | - Overhauled the README file with the aim of achieving maximum conciseness. 53 | 54 | ### Minor Changes 55 | 56 | - Enhanced the logic of program execution. 57 | 58 | ### Patch Changes 59 | 60 | - Implemented `UTF-8` writing to prevent potential file corruption issues. 61 | 62 | ## 0.8.21 63 | 64 | ### Patch Changes 65 | 66 | - Add README across Astro built-in integrations 67 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | **Latest Updates! 🎉 See the [change log](./CHANGELOG.md) for details.** 4 | 5 | 6 | 7 | # astro-robots 8 | 9 | It simplifies SEO management with a reliable robots.txt generator for Astro projects, offering zero-config setup and [Verified Bots](https://radar.cloudflare.com/traffic/verified-bots) support. 10 | 11 | [![Build Status](https://github.com/ACP-CODE/astro-robots/actions/workflows/release.yaml/badge.svg?style=flat-square)](https://github.com/ACP-CODE/astro-robots/actions/workflows/release.yaml) 12 | ![NPM Version](https://img.shields.io/npm/v/astro-robots?labelColor=black&color=light) 13 | ![NPM Downloads](https://img.shields.io/npm/d18m/astro-robots?labelColor=balck) 14 | 15 |
16 | 17 | ## Installation 18 | 19 | > The package compatible with Astro 4.0.0 and later. 20 | 21 | Quickly install with the `astro add` command: 22 | 23 | ```sh 24 | npx astro add astro-robots 25 | ``` 26 | 27 | If you run into issues, try with [Manual Setup](#setup) guide. 28 | 29 | ## Usage 30 | 31 |
32 | Manual Setup 33 | 34 | #### Setup 35 | 36 | To install manually, run: 37 | 38 | ```sh 39 | npm install astro-robots 40 | ``` 41 | 42 | Add the integration to your Astro config file (`astro.config.*`): 43 | 44 | ```ts 45 | // @ts-check 46 | import { defineConfig } from "astro/config"; 47 | import robots from "astro-robots"; // Add code manually 48 | 49 | export default defineConfig({ 50 | site: "https://example.com"; // If you want to support `@astrojs/sitemap` please provide this value 51 | integrations: [robots()], // Add code manually 52 | }); 53 | ``` 54 | 55 | After installing, run `npm run build` or `yarn build` in terminal: 56 | 57 | ```sh 58 | npm run build 59 | ``` 60 | 61 | This will output `robots.txt` to the `dist` folder with default rules: 62 | 63 | ```yaml 64 | User-agent: * 65 | Allow: / 66 | 67 | # crawling rule(s) for above bots 68 | Sitemap: https://example.com/sitemap-index.xml 69 | ``` 70 | 71 | #### Live Access 72 | 73 | Start the server with `npm run dev`, then access the virtual `robots.txt` at `http://localhost:4321/robots.txt`. 74 | 75 | 76 |
77 | 78 |
79 | 80 | Getting Started with Reference
81 | 82 | To configure the integration, pass an object to the `robots()` function in your `astro.config.*` file: 83 | 84 | ```ts 85 | // @ts-check 86 | import { defineConfig } from "astro/config"; 87 | import robots from "astro-robots"; 88 | 89 | export default defineConfig({ 90 | //... 91 | integrations: [ 92 | robots({ 93 | host: "https://example.com";, 94 | sitemap: [ 95 | "https://example.com/sitemap.xml", 96 | "https://www.example.com/sitemap.xml", 97 | ], 98 | policy: [ 99 | { 100 | userAgent: [ 101 | "Applebot", 102 | "Googlebot", 103 | "bingbot", 104 | "Yandex", 105 | "Yeti", 106 | "Baiduspider", 107 | "360Spider", 108 | "*", 109 | ], 110 | allow: ["/"], 111 | disallow: ["/admin", "/login"], 112 | crawlDelay: 5, 113 | cleanParam: ["sid /", "s /forum/showthread"], 114 | }, 115 | { 116 | userAgent: "BLEXBot", 117 | disallow: ["/assets", "/uploades/1989-08-21/*jpg$"], 118 | }, 119 | ], 120 | }), 121 | ], 122 | }); 123 | ``` 124 | 125 | With the above configuration, the generated `robots.txt` file will look like this: 126 | 127 | ```yaml 128 | User-agent: Applebot 129 | User-agent: Googlebot 130 | User-agent: bingbot 131 | User-agent: Yandex 132 | User-agent: Yeti 133 | User-agent: Baiduspider 134 | User-agent: 360Spider 135 | User-agent: * 136 | Allow: / 137 | Disallow: /admin 138 | Disallow: /login 139 | Crawl-delay: 5 140 | Clean-param: sid / 141 | Clean-param: s /forum/showthread 142 | 143 | User-agent: BLEXBot 144 | Disallow: /assets 145 | Disallow: /uploades/1989-08-21/*jpg$ 146 | 147 | # crawling rule(s) for above bots 148 | Sitemap: https://example.com/sitemap.xml 149 | Sitemap: https://www.example.com/sitemap.xml 150 | Host: example.com 151 | ``` 152 | 153 | > **Note:** Some directives like `Host`, `Clean-param`, and `Crawl-delay` may not be supported by all crawlers. For example, Yandex has ignored `Crawl-delay` since February 2018. To control Yandex's crawl rate, use the [Site crawl rate setting](https://yandex.com/support/webmaster/service/crawl-rate.html#crawl-rate) in Yandex Webmaster. 154 | 155 |
156 |
157 | 158 | ## Contributing 159 | 160 | Submit your issues or feedback on our [GitHub](https://github.com/ACP-CODE/astro-robots/issues) channel. 161 | 162 | ## License 163 | 164 | MIT 165 | -------------------------------------------------------------------------------- /assets/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | icon 4 | 5 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /build.config.ts: -------------------------------------------------------------------------------- 1 | import { defineBuildConfig } from "unbuild"; 2 | 3 | export default defineBuildConfig({ 4 | entries: ["./src/index"], 5 | declaration: true, 6 | rollup: { 7 | emitCJS: false, 8 | }, 9 | }); 10 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "astro-robots", 3 | "version": "2.3.1", 4 | "description": "A reliable robots.txt generator for Astro projects, offering zero-config setup and Verified Bots support.", 5 | "type": "module", 6 | "main": "./dist/index.mjs", 7 | "module": "./dist/index.mjs", 8 | "types": "./dist/index.d.ts", 9 | "exports": { 10 | ".": { 11 | "types": "./dist/index.d.ts", 12 | "import": "./dist/index.mjs" 13 | } 14 | }, 15 | "files": [ 16 | "dist/index.d.ts", 17 | "dist/index.mjs" 18 | ], 19 | "scripts": { 20 | "fmt": "prettier --write .", 21 | "build": "unbuild", 22 | "dev": "unbuild --watch", 23 | "prepublishOnly": "npm run build" 24 | }, 25 | "keywords": [ 26 | "astro-integration", 27 | "robots.txt", 28 | "seo assistant", 29 | "verified bots", 30 | "verified robots", 31 | "robots", 32 | "robotsgenerator" 33 | ], 34 | "repository": { 35 | "type": "git", 36 | "url": "git+https://github.com/ACP-CODE/astro-robots.git" 37 | }, 38 | "author": "Junlin", 39 | "license": "ISC", 40 | "bugs": { 41 | "url": "https://github.com/ACP-CODE/astro-robots/issues" 42 | }, 43 | "homepage": "https://github.com/ACP-CODE/astro-robots#readme", 44 | "devDependencies": { 45 | "@types/node": "^22.10.1", 46 | "astro": "^4.0.3", 47 | "prettier": "^3.4.1", 48 | "unbuild": "^2.0.0" 49 | }, 50 | "peerDependencies": { 51 | "astro": ">= 4.0.0" 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/core.ts: -------------------------------------------------------------------------------- 1 | import type { RobotsOptions } from "."; 2 | import type { Policy } from "./types"; 3 | import type { AstroIntegrationLogger } from "astro"; 4 | import path from "node:path"; 5 | 6 | function validatePolicy(policy: Policy[], logger: AstroIntegrationLogger) { 7 | if (policy.length === 0) { 8 | throwMsg("Policy array must contain at least one entry.", "error", logger); 9 | } 10 | 11 | policy.forEach((policy, index) => { 12 | if (!policy.userAgent) { 13 | throwMsg(`policy[${index}].userAgent is required.`, "error", logger); 14 | } 15 | 16 | if ( 17 | (!policy.allow && !policy.disallow) || 18 | (policy.allow?.length === 0 && policy.disallow?.length === 0) 19 | ) { 20 | throwMsg( 21 | `policy[${index}] must have at least one 'disallow' or 'allow' entry.`, 22 | "error", 23 | logger, 24 | ); 25 | } 26 | 27 | if (policy.crawlDelay) { 28 | if ( 29 | typeof policy.crawlDelay !== "number" || 30 | policy.crawlDelay < 0.1 || 31 | policy.crawlDelay > 60 32 | ) { 33 | throwMsg( 34 | `policy[${index}].crawlDelay must be between 0.1 and 60 seconds.`, 35 | "error", 36 | logger, 37 | ); 38 | } 39 | } 40 | }); 41 | } 42 | 43 | function generatePolicyContent(policy: Policy[]): string { 44 | return policy 45 | .map((rule) => { 46 | let content = `User-agent: ${Array.isArray(rule.userAgent) ? rule.userAgent.join("\nUser-agent: ") : rule.userAgent || "*"}\n`; 47 | 48 | if (rule.allow) { 49 | const allowPaths = Array.isArray(rule.allow) 50 | ? rule.allow 51 | : [rule.allow]; 52 | allowPaths.forEach((path: any) => (content += `Allow: ${path}\n`)); 53 | } 54 | 55 | if (rule.disallow) { 56 | const disallowPaths = Array.isArray(rule.disallow) 57 | ? rule.disallow 58 | : [rule.disallow]; 59 | disallowPaths.forEach( 60 | (path: any) => (content += `Disallow: ${path}\n`), 61 | ); 62 | } 63 | 64 | if (rule.crawlDelay) content += `Crawl-delay: ${rule.crawlDelay}\n`; 65 | 66 | if (rule.cleanParam) { 67 | const cleanParams = Array.isArray(rule.cleanParam) 68 | ? rule.cleanParam 69 | : [rule.cleanParam]; 70 | cleanParams.forEach( 71 | (param: any) => (content += `Clean-param: ${param}\n`), 72 | ); 73 | } 74 | 75 | return content; 76 | }) 77 | .join("\n"); 78 | } 79 | 80 | function validateSitemapUrl(url: string): boolean { 81 | const urlPattern = 82 | /^(https?:\/\/)[^\s/$.?#].[^\s]*\.(xml|xml\.gz|txt|txt\.gz|json|xhtml)$/i; 83 | return urlPattern.test(url); 84 | } 85 | 86 | function generateSitemapContent( 87 | options: RobotsOptions, 88 | siteHref: string, 89 | logger: AstroIntegrationLogger, 90 | ): string { 91 | if (!options.sitemap) return ""; 92 | 93 | if (options.sitemap === true) { 94 | if (siteHref) { 95 | const defaultSitemap = `${siteHref}sitemap-index.xml`; 96 | return `Sitemap: ${defaultSitemap}`; 97 | } else { 98 | logger.warn( 99 | "`sitemap` is configured as true, but `AstroConfig.site` is not provided. Failed to generate default sitemap URL.", 100 | ); 101 | return ""; 102 | } 103 | } 104 | 105 | const sitemaps = Array.isArray(options.sitemap) 106 | ? options.sitemap 107 | : [options.sitemap]; 108 | const validSitemaps = sitemaps.filter((url) => { 109 | if (validateSitemapUrl(url)) { 110 | return true; 111 | } else { 112 | logger.warn(`\x1b[33mInvalid Sitemap URL: ${url}\x1b[33m`); 113 | return false; 114 | } 115 | }); 116 | 117 | return validSitemaps.map((url) => `Sitemap: ${url}`).join("\n") + "\n"; 118 | } 119 | 120 | function validateHost(host: string, logger: AstroIntegrationLogger) { 121 | const hostPattern = 122 | /^(?=.{1,253}$)(?:(?!-)[a-zA-Z0-9-]{1,63}(? logger.warn(`\x1b[33mWarning: ${msg}\x1b[0m`), 151 | error: () => { 152 | logger.error(`\x1b[31m${msg}\x1b[0m`); 153 | throw new Error(msg); 154 | }, 155 | }; 156 | 157 | formattedMsg[type]?.(); 158 | } 159 | 160 | export function generate( 161 | options: RobotsOptions, 162 | siteMapHref: string, 163 | logger: AstroIntegrationLogger, 164 | ): string { 165 | if (!options.policy || options.policy.length === 0) { 166 | throwMsg("Policy configuration is required.", "error", logger); 167 | } 168 | 169 | validatePolicy(options.policy, logger); 170 | 171 | const content = [ 172 | generatePolicyContent(options.policy), 173 | generateSitemapContent(options, siteMapHref, logger), 174 | generateHostContent(options, logger), 175 | ].join("\n"); 176 | 177 | return content.trim(); 178 | } 179 | 180 | export function logInfo( 181 | fileSize: number, 182 | logger: AstroIntegrationLogger, 183 | destDir: string, 184 | ) { 185 | const sizeLimit = 10; 186 | if (fileSize > sizeLimit) { 187 | console.log( 188 | `\n\x1b[42m\x1b[30m Generating 'robots.txt' file \x1b[39m\x1b[0m`, 189 | ); 190 | const warnMsg = [ 191 | `\x1b[33m(!) 'robots.txt' file size is ${fileSize} KB.`, 192 | `- Keep it under ${sizeLimit} KB for best results.\x1b[0m\n`, 193 | ]; 194 | console.log(warnMsg.join("\n")); 195 | } 196 | 197 | logger.info( 198 | `\`robots.txt\` (${fileSize}KB) created at \`${path.relative(process.cwd(), destDir)}\``, 199 | ); 200 | } 201 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | import type { AstroConfig, AstroIntegration } from "astro"; 2 | import type { Policy } from "./types"; 3 | import fs from "fs/promises"; 4 | import { fileURLToPath } from "node:url"; 5 | import { generate, logInfo } from "./core"; 6 | import { measureExecutionTime, getFileSizeInKilobytes } from "./utils"; 7 | 8 | export interface RobotsOptions { 9 | /** 10 | * @description 11 | * Used to specify rules that apply to one or more robots. 12 | * @default 13 | * All robots are allowed. 14 | * ```ts 15 | * policy:[ 16 | * { 17 | * userAgent: "*", 18 | * allow: "/" 19 | * } 20 | * ] 21 | * ``` 22 | * For more help, refer to [SYNTAX](https://yandex.com/support/webmaster/controlling-robot/robots-txt.html#recommend) by Yandex. 23 | */ 24 | policy: Policy[]; 25 | /** 26 | * @description 27 | * The location of a sitemap for this website. 28 | * @example 29 | * ```ts 30 | * sitemap: [ 31 | * "https://example.com/sitemap.xml", 32 | * "https://www.example.com/sitemap.xml" 33 | * ] 34 | * ``` 35 | * The value of the [SITEMAP](https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt#sitemap) 36 | * field is case-sensitive. 37 | */ 38 | sitemap?: boolean | string | string[]; 39 | /** 40 | * @default null 41 | * @description 42 | * Specify the value of `Host`, some crawlers(Yandex) support and only accept domain names. 43 | * @example 44 | * ```ts 45 | * host: siteUrl.replace(/^https?:\/\/|:\d+/g, "") 46 | * ``` 47 | */ 48 | host?: null | string; 49 | } 50 | 51 | const defaults: RobotsOptions = { 52 | policy: [ 53 | { 54 | userAgent: "*", 55 | allow: "/", 56 | }, 57 | ], 58 | sitemap: true, 59 | host: null, 60 | }; 61 | 62 | export default function robots(options?: RobotsOptions): AstroIntegration { 63 | let config: AstroConfig; 64 | let finalSiteMapHref: string; 65 | 66 | const filename = "robots.txt"; 67 | const opts = { ...defaults, ...options }; 68 | 69 | return { 70 | name: "astro-robots", 71 | hooks: { 72 | "astro:config:setup": ({ config: cfg, }) => { 73 | config = cfg; 74 | if (config.site) { 75 | finalSiteMapHref = new URL(config.base, config.site).href; 76 | } 77 | }, 78 | "astro:server:setup": ({ server, logger }) => { 79 | server.middlewares.use(async (req, res, next) => { 80 | if (req.url?.startsWith(`/${filename}`)) { 81 | res.setHeader("Content-Type", "text/plain"); 82 | res.setHeader("Cache-Control", "no-cache"); 83 | res.end(generate(opts, finalSiteMapHref, logger)); 84 | } else { 85 | next(); 86 | } 87 | }); 88 | }, 89 | "astro:build:done": async ({ dir, logger }) => { 90 | const fileURL = new URL(filename, dir); 91 | const destDir = fileURLToPath(dir); 92 | const fileBuffer = generate(opts, finalSiteMapHref, logger); 93 | 94 | try { 95 | await fs.mkdir(destDir, { recursive: true }); 96 | await fs.writeFile(fileURL, fileBuffer, "utf-8"); 97 | throw "done"; 98 | } catch (e) { 99 | if( e === "done") { 100 | const fileSize = getFileSizeInKilobytes(fileBuffer); 101 | logInfo(fileSize, logger, destDir); 102 | } else { 103 | throw e; 104 | } 105 | } 106 | }, 107 | }, 108 | }; 109 | } 110 | -------------------------------------------------------------------------------- /src/types/Policy.ts: -------------------------------------------------------------------------------- 1 | import type { UserAgent } from "./UserAgent"; 2 | 3 | export interface Policy { 4 | /** 5 | * @description 6 | * Indicates the robot to which the rules listed in "robots.txt" apply. 7 | * @example 8 | * ```ts 9 | * policy:[ 10 | * { 11 | * userAgent: [ 12 | * 'Googlebot', 13 | * 'Applebot', 14 | * 'Baiduspider', 15 | * 'bingbot' 16 | * ], 17 | * // crawling rule(s) for above bots 18 | * } 19 | * ] 20 | * ``` 21 | * Verified bots, refer to [DITIG](https://www.ditig.com/robots-txt-template#regular-template) or [Cloudflare Radar](https://radar.cloudflare.com/traffic/verified-bots). 22 | */ 23 | userAgent: UserAgent | UserAgent[]; 24 | /** 25 | * @description 26 | * [ At least one or more `allow` or `disallow` entries per rule ] Allows indexing site sections or individual pages. 27 | * @example 28 | * ```ts 29 | * policy:[{allow:["/"]}] 30 | * ``` 31 | * Path-based URL matching, refer to [SYNTAX](https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt#url-matching-based-on-path-values) via Google. 32 | */ 33 | allow?: string | string[]; 34 | /** 35 | * @description 36 | * [ At least one or more `disallow` or `allow` entries per rule ] Prohibits indexing site sections or individual pages. 37 | * @example 38 | * ```ts 39 | * policy:[ 40 | * { 41 | * disallow:[ 42 | * "/admin", 43 | * "/uploads/1989-08-21/*.jpg$" 44 | * ] 45 | * } 46 | * ] 47 | * ``` 48 | * Path-based URL matching, refer to [SYNTAX](https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt#url-matching-based-on-path-values) via Google. 49 | */ 50 | disallow?: string | string[]; 51 | /** 52 | * @description 53 | *Specifies the minimum interval (in seconds) for the search robot to wait after loading one page, before starting to load another. 54 | * 55 | * @example 56 | * ```ts 57 | * policy:[{crawlDelay:5}] 58 | * ``` 59 | * About the [Crawl-delay](https://yandex.com/support/webmaster/robot-workings/crawl-delay.html#crawl-delay) directive. 60 | */ 61 | crawlDelay?: number; 62 | /** 63 | * @description 64 | * Indicates to the robot that the page URL contains parameters (like UTM tags) that should be ignored when indexing it. 65 | * 66 | * @example 67 | * ```shell 68 | * # for URLs like: 69 | * www.example2.com/index.php?page=1&sid=2564126ebdec301c607e5df 70 | * www.example2.com/index.php?page=1&sid=974017dcd170d6c4a5d76ae 71 | * ``` 72 | * 73 | * ```js 74 | * policy:[ 75 | * { 76 | * cleanParam: [ 77 | * "sid /index.php", 78 | * ] 79 | * } 80 | * ] 81 | * ``` 82 | * For more additional examples, see the 83 | * Yandex [SYNTAX](https://yandex.com/support/webmaster/robot-workings/clean-param.html#clean-param__additional) guide. 84 | */ 85 | cleanParam?: string | string[]; 86 | } 87 | -------------------------------------------------------------------------------- /src/types/UserAgent.ts: -------------------------------------------------------------------------------- 1 | // Updates can be retrieved from: https://www.ditig.com/robots-txt-template 2 | // Last update: 2023-03-15 3 | 4 | export type SearchEngines = { 5 | // so.com chinese search engine 6 | 360: "360Spider" | "360Spider-Image" | "360Spider-Video" | "HaoSouSpider"; 7 | // apple.com search engine 8 | Apple: "Applebot" | "AppleNewsBot"; 9 | // baidu.com chinese search engine 10 | Baidu: 11 | | "Baiduspider" 12 | | "Baiduspider-image" 13 | | "Baiduspider-mobile" 14 | | "Baiduspider-news" 15 | | "Baiduspider-video"; 16 | // bing.com international search engine 17 | Bing: 18 | | "bingbot" 19 | | "BingPreview" 20 | | "msnbot" 21 | | "msnbot-media" 22 | | "adidxbot" 23 | | "MSN"; 24 | // bublup.com suggestion/search engine 25 | Bublup: "BublupBot"; 26 | // cliqz.com german in-product search engine 27 | Cliqz: "Cliqzbot"; 28 | // coccoc.com vietnamese search engine 29 | Coccoc: "coccoc" | "coccocbot-image" | "coccocbot-web"; 30 | // daum.net korean search engine 31 | Daumoa: "Daumoa"; 32 | // dazoo.fr french search engine 33 | Dazoo: "DeuSu"; 34 | // duckduckgo.com international privacy search engine 35 | Duckduckgo: "DuckDuckBot" | "DuckDuckGo-Favicons-Bot"; 36 | // eurip.com european search engine 37 | Eurip: "EuripBot"; 38 | // exploratodo.com latin search engine 39 | Exploratodo: "Exploratodo"; 40 | // findx.com european search engine 41 | Findx: "Findxbot"; 42 | // goo.ne.jp japanese search engine 43 | Goo: "gooblog" | "ichiro"; 44 | // google.com international search engine 45 | Google: 46 | | "Googlebot" 47 | | "Googlebot-Image" 48 | | "Googlebot-Mobile" 49 | | "Googlebot-News" 50 | | "Googlebot-Video" 51 | | "Mediapartners-Google" 52 | | "AdsBot-Google" 53 | | "AdsBot-Google-Mobile" 54 | | "AdsBot-Google-Mobile-Apps" 55 | | "Mediapartners-Google" 56 | | "Storebot-Google" 57 | | "Google-InspectionTool" 58 | | "FeedFetcher-Google"; 59 | // istella.it italian search engine 60 | Istella: "istellabot"; 61 | // jike.com / chinaso.com chinese search engine 62 | Jike: "JikeSpider"; 63 | // lycos.com & hotbot.com international search engine 64 | Lycos: "Lycos"; 65 | // mail.ru russian search engine 66 | Mail: "Mail.Ru"; 67 | // mojeek.com search engine 68 | Mojeek: "MojeekBot"; 69 | // orange.com international search engine 70 | Orange: "OrangeBot"; 71 | // botje.nl dutch search engine 72 | Botje: "Plukkie"; 73 | // qwant.com french search engine 74 | Qwant: "Qwantify"; 75 | // rambler.ru russian search engine 76 | Rambler: "Rambler"; 77 | // seznam.cz czech search engine 78 | Seznam: "SeznamBot"; 79 | // soso.com chinese search engine 80 | Soso: "Sosospider"; 81 | // yahoo.com international search engine 82 | Yahoo: "Slurp"; 83 | // sogou.com chinese search engine 84 | Sogou: 85 | | "Sogou blog" 86 | | "Sogou inst spider" 87 | | "Sogou News Spider" 88 | | "Sogou Orion spider" 89 | | "Sogou spider2" 90 | | "Sogou web spider"; 91 | // sputnik.ru russian search engine 92 | Sputnik: "SputnikBot"; 93 | // ask.com international search engine 94 | Ask: "Teoma"; 95 | // wotbox.com international search engine 96 | Wortbox: "wotbox"; 97 | // yandex.com russian search engine 98 | Yandex: "Yandex" | "YandexMobileBot"; 99 | // search.naver.com south korean search engine 100 | Naver: "Yeti"; 101 | // yioop.com international search engine 102 | Yioop: "YioopBot"; 103 | // yooz.ir iranian search engine 104 | Yooz: "yoozBot"; 105 | // youdao.com chinese search engine 106 | Youdao: "YoudaoBot"; 107 | }; 108 | 109 | export type SocialNetwork = { 110 | // facebook.com social network 111 | Facebook: "facebookcatalog" | "facebookexternalhit" | "Facebot"; 112 | // pinterest.com social networtk 113 | Pinterest: "Pinterest"; 114 | // twitter.com social media bot 115 | Tittwer: "Twitterbot"; 116 | // whatsapp.com preview bot 117 | WhatsApp: "WhatsApp"; 118 | // linkedin.com search engine crawler 119 | LinkedIn: "LinkedInBot"; 120 | }; 121 | 122 | export type SearchEngineOptimization = { 123 | Ahrefs: "AhrefsBot"; 124 | Moz: "Moz dotbot" | "Moz rogerbot"; 125 | WebMeUp: "BLEXBot"; 126 | Botify: "Botify"; 127 | Babbar: "Barkrowler"; 128 | SEMrush: "SEMrush" | "SemrushBotSI"; 129 | Cxense: "Cxense"; 130 | EzoicInc: "EzoicBot"; 131 | DataForSEO: "DataForSEO"; 132 | PrerenderLLC: "prerender"; 133 | }; 134 | 135 | export type UserAgent = 136 | | "*" 137 | | SearchEngines[keyof SearchEngines] 138 | | SocialNetwork[keyof SocialNetwork] 139 | | SearchEngineOptimization[keyof SearchEngineOptimization]; 140 | -------------------------------------------------------------------------------- /src/types/index.ts: -------------------------------------------------------------------------------- 1 | export type { Policy } from "./Policy"; 2 | -------------------------------------------------------------------------------- /src/utils/index.ts: -------------------------------------------------------------------------------- 1 | export { getFileSizeInKilobytes } from "./size"; 2 | export { measureExecutionTime } from "./time"; 3 | -------------------------------------------------------------------------------- /src/utils/size.ts: -------------------------------------------------------------------------------- 1 | import { fileURLToPath } from "node:url"; 2 | import fs from "fs/promises"; 3 | 4 | // export async function getFileSizeInKilobytes(fileUrl: URL): Promise { 5 | // const filePath = fileURLToPath(fileUrl); 6 | 7 | // try { 8 | // await fs.access(filePath); 9 | // const fileBuffer = await fs.readFile(filePath); // 读取整个文件 10 | // return fileBuffer.length / 1024; // 文件字节数转 KB 11 | // } catch (error) { 12 | // console.error("Error reading file:", error); 13 | // return 0; 14 | // } 15 | // } 16 | 17 | export function getFileSizeInKilobytes(fileBuffer: string): number { 18 | return Buffer.byteLength(fileBuffer, 'utf8') / 1024; // 字符串长度转 KB 19 | } 20 | 21 | -------------------------------------------------------------------------------- /src/utils/time.ts: -------------------------------------------------------------------------------- 1 | import { performance } from "perf_hooks"; 2 | 3 | export function measureExecutionTime(callback: () => void): number { 4 | const startTime = performance.now(); 5 | callback(); 6 | const endTime = performance.now(); 7 | const executionTime = Math.floor(endTime - startTime); 8 | return executionTime; 9 | } 10 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "astro/tsconfigs/strict" 3 | } 4 | --------------------------------------------------------------------------------