├── .github
└── workflows
│ └── release.yaml
├── .gitignore
├── .prettierrc
├── CHANGELOG.md
├── README.md
├── assets
└── logo.svg
├── build.config.ts
├── package-lock.json
├── package.json
├── src
├── core.ts
├── index.ts
├── types
│ ├── Policy.ts
│ ├── UserAgent.ts
│ └── index.ts
└── utils
│ ├── index.ts
│ ├── size.ts
│ └── time.ts
└── tsconfig.json
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
1 | name: Release
2 |
3 | on:
4 | push:
5 | tags:
6 | - "v*.*.*"
7 |
8 | jobs:
9 | publish:
10 | name: Publish
11 | runs-on: ubuntu-latest
12 | permissions:
13 | contents: write
14 | id-token: write
15 | steps:
16 | - name: Checkout
17 | uses: actions/checkout@v4
18 | with:
19 | fetch-depth: 0
20 |
21 | - name: Install Node.js
22 | uses: actions/setup-node@v4
23 | with:
24 | cache: npm
25 | node-version: 18
26 | registry-url: "https://registry.npmjs.org"
27 |
28 | - name: Install dependencies
29 | run: npm install
30 |
31 | - name: Publish
32 | run: npm publish --access public
33 | env:
34 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
35 | NPM_CONFIG_PROVENANCE: true
36 |
37 | - name: Generate changelog
38 | uses: hideoo/changelogithub-action@v1
39 | with:
40 | token: ${{ secrets.GITHUB_TOKEN }}
41 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # build output
2 | dist/
3 | # generated types
4 | .astro/
5 |
6 | # dependencies
7 | node_modules/
8 |
9 | # logs
10 | npm-debug.log*
11 | yarn-debug.log*
12 | yarn-error.log*
13 | pnpm-debug.log*
14 |
15 |
16 | # environment variables
17 | .env
18 | .env.production
19 |
20 | # macOS-specific files
21 | .DS_Store
22 |
23 | # jetbrains setting folder
24 | .idea/
25 | src/.DS_Store
26 |
--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 | "endOfLine": "auto"
3 | }
4 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | This file documents all significant modifications made to the Astro Integration `astro-robots`.
4 |
5 | ## 2.3.0
6 |
7 | ### Major Updates
8 |
9 | - **`peerDependencies`** Update `"astro": "^4.0.0 || ^5.0.0"` Compatible with version 5.0.0 and above
10 |
11 | ## 2.2.0
12 |
13 | ### Major Updates
14 |
15 | - Updates can now be previewed immediately by visiting `http://localhost:4321/robots.txt`, rather than waiting for the heavy build task to complete.
16 |
17 | ### Bug Fixes
18 |
19 | - `dir` may not exist
20 |
21 | ## 2.1.0
22 |
23 | - Reduce and adjust code logic
24 | - Simplify the error when Sitemap is true and Asconfig.site is not set to a warning that can be ignored
25 |
26 | ## 2.0.1
27 |
28 | - Update the loggerInfo.
29 |
30 | ## 2.0.0
31 |
32 | - Update to AstroIntegrationLogger.
33 |
34 | ## 1.0.2
35 |
36 | ### Patch Changes
37 |
38 | - Bug - Fixed the issue that prevented the correct reading of file size information when output `server`, `hybrid` not `static`.
39 |
40 | ## 1.0.1
41 |
42 | ### Minor Changes
43 |
44 | - Enhancements - Added warnings for robots.txt files exceeding 10KB size limit
45 |
46 | ## 1.0.0
47 |
48 | ### Major Changes
49 |
50 | - Introduced `measureExecutionTime` utility function.
51 | - Introduced `logger` object, ensuring compatibility with Astro2.0 and the latest version 3.0.
52 | - Overhauled the README file with the aim of achieving maximum conciseness.
53 |
54 | ### Minor Changes
55 |
56 | - Enhanced the logic of program execution.
57 |
58 | ### Patch Changes
59 |
60 | - Implemented `UTF-8` writing to prevent potential file corruption issues.
61 |
62 | ## 0.8.21
63 |
64 | ### Patch Changes
65 |
66 | - Add README across Astro built-in integrations
67 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | **Latest Updates! 🎉 See the [change log](./CHANGELOG.md) for details.**
4 |
5 |

6 |
7 | # astro-robots
8 |
9 | It simplifies SEO management with a reliable robots.txt generator for Astro projects, offering zero-config setup and [Verified Bots](https://radar.cloudflare.com/traffic/verified-bots) support.
10 |
11 | [](https://github.com/ACP-CODE/astro-robots/actions/workflows/release.yaml)
12 | 
13 | 
14 |
15 |
16 |
17 | ## Installation
18 |
19 | > The package compatible with Astro 4.0.0 and later.
20 |
21 | Quickly install with the `astro add` command:
22 |
23 | ```sh
24 | npx astro add astro-robots
25 | ```
26 |
27 | If you run into issues, try with [Manual Setup](#setup) guide.
28 |
29 | ## Usage
30 |
31 |
32 | Manual Setup
33 |
34 | #### Setup
35 |
36 | To install manually, run:
37 |
38 | ```sh
39 | npm install astro-robots
40 | ```
41 |
42 | Add the integration to your Astro config file (`astro.config.*`):
43 |
44 | ```ts
45 | // @ts-check
46 | import { defineConfig } from "astro/config";
47 | import robots from "astro-robots"; // Add code manually
48 |
49 | export default defineConfig({
50 | site: "https://example.com"; // If you want to support `@astrojs/sitemap` please provide this value
51 | integrations: [robots()], // Add code manually
52 | });
53 | ```
54 |
55 | After installing, run `npm run build` or `yarn build` in terminal:
56 |
57 | ```sh
58 | npm run build
59 | ```
60 |
61 | This will output `robots.txt` to the `dist` folder with default rules:
62 |
63 | ```yaml
64 | User-agent: *
65 | Allow: /
66 |
67 | # crawling rule(s) for above bots
68 | Sitemap: https://example.com/sitemap-index.xml
69 | ```
70 |
71 | #### Live Access
72 |
73 | Start the server with `npm run dev`, then access the virtual `robots.txt` at `http://localhost:4321/robots.txt`.
74 |
75 |
76 |
77 |
78 |
79 |
80 | Getting Started with Reference
81 |
82 | To configure the integration, pass an object to the `robots()` function in your `astro.config.*` file:
83 |
84 | ```ts
85 | // @ts-check
86 | import { defineConfig } from "astro/config";
87 | import robots from "astro-robots";
88 |
89 | export default defineConfig({
90 | //...
91 | integrations: [
92 | robots({
93 | host: "https://example.com";,
94 | sitemap: [
95 | "https://example.com/sitemap.xml",
96 | "https://www.example.com/sitemap.xml",
97 | ],
98 | policy: [
99 | {
100 | userAgent: [
101 | "Applebot",
102 | "Googlebot",
103 | "bingbot",
104 | "Yandex",
105 | "Yeti",
106 | "Baiduspider",
107 | "360Spider",
108 | "*",
109 | ],
110 | allow: ["/"],
111 | disallow: ["/admin", "/login"],
112 | crawlDelay: 5,
113 | cleanParam: ["sid /", "s /forum/showthread"],
114 | },
115 | {
116 | userAgent: "BLEXBot",
117 | disallow: ["/assets", "/uploades/1989-08-21/*jpg$"],
118 | },
119 | ],
120 | }),
121 | ],
122 | });
123 | ```
124 |
125 | With the above configuration, the generated `robots.txt` file will look like this:
126 |
127 | ```yaml
128 | User-agent: Applebot
129 | User-agent: Googlebot
130 | User-agent: bingbot
131 | User-agent: Yandex
132 | User-agent: Yeti
133 | User-agent: Baiduspider
134 | User-agent: 360Spider
135 | User-agent: *
136 | Allow: /
137 | Disallow: /admin
138 | Disallow: /login
139 | Crawl-delay: 5
140 | Clean-param: sid /
141 | Clean-param: s /forum/showthread
142 |
143 | User-agent: BLEXBot
144 | Disallow: /assets
145 | Disallow: /uploades/1989-08-21/*jpg$
146 |
147 | # crawling rule(s) for above bots
148 | Sitemap: https://example.com/sitemap.xml
149 | Sitemap: https://www.example.com/sitemap.xml
150 | Host: example.com
151 | ```
152 |
153 | > **Note:** Some directives like `Host`, `Clean-param`, and `Crawl-delay` may not be supported by all crawlers. For example, Yandex has ignored `Crawl-delay` since February 2018. To control Yandex's crawl rate, use the [Site crawl rate setting](https://yandex.com/support/webmaster/service/crawl-rate.html#crawl-rate) in Yandex Webmaster.
154 |
155 |
156 |
157 |
158 | ## Contributing
159 |
160 | Submit your issues or feedback on our [GitHub](https://github.com/ACP-CODE/astro-robots/issues) channel.
161 |
162 | ## License
163 |
164 | MIT
165 |
--------------------------------------------------------------------------------
/assets/logo.svg:
--------------------------------------------------------------------------------
1 |
2 |
10 |
--------------------------------------------------------------------------------
/build.config.ts:
--------------------------------------------------------------------------------
1 | import { defineBuildConfig } from "unbuild";
2 |
3 | export default defineBuildConfig({
4 | entries: ["./src/index"],
5 | declaration: true,
6 | rollup: {
7 | emitCJS: false,
8 | },
9 | });
10 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "astro-robots",
3 | "version": "2.3.1",
4 | "description": "A reliable robots.txt generator for Astro projects, offering zero-config setup and Verified Bots support.",
5 | "type": "module",
6 | "main": "./dist/index.mjs",
7 | "module": "./dist/index.mjs",
8 | "types": "./dist/index.d.ts",
9 | "exports": {
10 | ".": {
11 | "types": "./dist/index.d.ts",
12 | "import": "./dist/index.mjs"
13 | }
14 | },
15 | "files": [
16 | "dist/index.d.ts",
17 | "dist/index.mjs"
18 | ],
19 | "scripts": {
20 | "fmt": "prettier --write .",
21 | "build": "unbuild",
22 | "dev": "unbuild --watch",
23 | "prepublishOnly": "npm run build"
24 | },
25 | "keywords": [
26 | "astro-integration",
27 | "robots.txt",
28 | "seo assistant",
29 | "verified bots",
30 | "verified robots",
31 | "robots",
32 | "robotsgenerator"
33 | ],
34 | "repository": {
35 | "type": "git",
36 | "url": "git+https://github.com/ACP-CODE/astro-robots.git"
37 | },
38 | "author": "Junlin",
39 | "license": "ISC",
40 | "bugs": {
41 | "url": "https://github.com/ACP-CODE/astro-robots/issues"
42 | },
43 | "homepage": "https://github.com/ACP-CODE/astro-robots#readme",
44 | "devDependencies": {
45 | "@types/node": "^22.10.1",
46 | "astro": "^4.0.3",
47 | "prettier": "^3.4.1",
48 | "unbuild": "^2.0.0"
49 | },
50 | "peerDependencies": {
51 | "astro": ">= 4.0.0"
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/src/core.ts:
--------------------------------------------------------------------------------
1 | import type { RobotsOptions } from ".";
2 | import type { Policy } from "./types";
3 | import type { AstroIntegrationLogger } from "astro";
4 | import path from "node:path";
5 |
6 | function validatePolicy(policy: Policy[], logger: AstroIntegrationLogger) {
7 | if (policy.length === 0) {
8 | throwMsg("Policy array must contain at least one entry.", "error", logger);
9 | }
10 |
11 | policy.forEach((policy, index) => {
12 | if (!policy.userAgent) {
13 | throwMsg(`policy[${index}].userAgent is required.`, "error", logger);
14 | }
15 |
16 | if (
17 | (!policy.allow && !policy.disallow) ||
18 | (policy.allow?.length === 0 && policy.disallow?.length === 0)
19 | ) {
20 | throwMsg(
21 | `policy[${index}] must have at least one 'disallow' or 'allow' entry.`,
22 | "error",
23 | logger,
24 | );
25 | }
26 |
27 | if (policy.crawlDelay) {
28 | if (
29 | typeof policy.crawlDelay !== "number" ||
30 | policy.crawlDelay < 0.1 ||
31 | policy.crawlDelay > 60
32 | ) {
33 | throwMsg(
34 | `policy[${index}].crawlDelay must be between 0.1 and 60 seconds.`,
35 | "error",
36 | logger,
37 | );
38 | }
39 | }
40 | });
41 | }
42 |
43 | function generatePolicyContent(policy: Policy[]): string {
44 | return policy
45 | .map((rule) => {
46 | let content = `User-agent: ${Array.isArray(rule.userAgent) ? rule.userAgent.join("\nUser-agent: ") : rule.userAgent || "*"}\n`;
47 |
48 | if (rule.allow) {
49 | const allowPaths = Array.isArray(rule.allow)
50 | ? rule.allow
51 | : [rule.allow];
52 | allowPaths.forEach((path: any) => (content += `Allow: ${path}\n`));
53 | }
54 |
55 | if (rule.disallow) {
56 | const disallowPaths = Array.isArray(rule.disallow)
57 | ? rule.disallow
58 | : [rule.disallow];
59 | disallowPaths.forEach(
60 | (path: any) => (content += `Disallow: ${path}\n`),
61 | );
62 | }
63 |
64 | if (rule.crawlDelay) content += `Crawl-delay: ${rule.crawlDelay}\n`;
65 |
66 | if (rule.cleanParam) {
67 | const cleanParams = Array.isArray(rule.cleanParam)
68 | ? rule.cleanParam
69 | : [rule.cleanParam];
70 | cleanParams.forEach(
71 | (param: any) => (content += `Clean-param: ${param}\n`),
72 | );
73 | }
74 |
75 | return content;
76 | })
77 | .join("\n");
78 | }
79 |
80 | function validateSitemapUrl(url: string): boolean {
81 | const urlPattern =
82 | /^(https?:\/\/)[^\s/$.?#].[^\s]*\.(xml|xml\.gz|txt|txt\.gz|json|xhtml)$/i;
83 | return urlPattern.test(url);
84 | }
85 |
86 | function generateSitemapContent(
87 | options: RobotsOptions,
88 | siteHref: string,
89 | logger: AstroIntegrationLogger,
90 | ): string {
91 | if (!options.sitemap) return "";
92 |
93 | if (options.sitemap === true) {
94 | if (siteHref) {
95 | const defaultSitemap = `${siteHref}sitemap-index.xml`;
96 | return `Sitemap: ${defaultSitemap}`;
97 | } else {
98 | logger.warn(
99 | "`sitemap` is configured as true, but `AstroConfig.site` is not provided. Failed to generate default sitemap URL.",
100 | );
101 | return "";
102 | }
103 | }
104 |
105 | const sitemaps = Array.isArray(options.sitemap)
106 | ? options.sitemap
107 | : [options.sitemap];
108 | const validSitemaps = sitemaps.filter((url) => {
109 | if (validateSitemapUrl(url)) {
110 | return true;
111 | } else {
112 | logger.warn(`\x1b[33mInvalid Sitemap URL: ${url}\x1b[33m`);
113 | return false;
114 | }
115 | });
116 |
117 | return validSitemaps.map((url) => `Sitemap: ${url}`).join("\n") + "\n";
118 | }
119 |
120 | function validateHost(host: string, logger: AstroIntegrationLogger) {
121 | const hostPattern =
122 | /^(?=.{1,253}$)(?:(?!-)[a-zA-Z0-9-]{1,63}(? logger.warn(`\x1b[33mWarning: ${msg}\x1b[0m`),
151 | error: () => {
152 | logger.error(`\x1b[31m${msg}\x1b[0m`);
153 | throw new Error(msg);
154 | },
155 | };
156 |
157 | formattedMsg[type]?.();
158 | }
159 |
160 | export function generate(
161 | options: RobotsOptions,
162 | siteMapHref: string,
163 | logger: AstroIntegrationLogger,
164 | ): string {
165 | if (!options.policy || options.policy.length === 0) {
166 | throwMsg("Policy configuration is required.", "error", logger);
167 | }
168 |
169 | validatePolicy(options.policy, logger);
170 |
171 | const content = [
172 | generatePolicyContent(options.policy),
173 | generateSitemapContent(options, siteMapHref, logger),
174 | generateHostContent(options, logger),
175 | ].join("\n");
176 |
177 | return content.trim();
178 | }
179 |
180 | export function logInfo(
181 | fileSize: number,
182 | logger: AstroIntegrationLogger,
183 | destDir: string,
184 | ) {
185 | const sizeLimit = 10;
186 | if (fileSize > sizeLimit) {
187 | console.log(
188 | `\n\x1b[42m\x1b[30m Generating 'robots.txt' file \x1b[39m\x1b[0m`,
189 | );
190 | const warnMsg = [
191 | `\x1b[33m(!) 'robots.txt' file size is ${fileSize} KB.`,
192 | `- Keep it under ${sizeLimit} KB for best results.\x1b[0m\n`,
193 | ];
194 | console.log(warnMsg.join("\n"));
195 | }
196 |
197 | logger.info(
198 | `\`robots.txt\` (${fileSize}KB) created at \`${path.relative(process.cwd(), destDir)}\``,
199 | );
200 | }
201 |
--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
1 | import type { AstroConfig, AstroIntegration } from "astro";
2 | import type { Policy } from "./types";
3 | import fs from "fs/promises";
4 | import { fileURLToPath } from "node:url";
5 | import { generate, logInfo } from "./core";
6 | import { measureExecutionTime, getFileSizeInKilobytes } from "./utils";
7 |
8 | export interface RobotsOptions {
9 | /**
10 | * @description
11 | * Used to specify rules that apply to one or more robots.
12 | * @default
13 | * All robots are allowed.
14 | * ```ts
15 | * policy:[
16 | * {
17 | * userAgent: "*",
18 | * allow: "/"
19 | * }
20 | * ]
21 | * ```
22 | * For more help, refer to [SYNTAX](https://yandex.com/support/webmaster/controlling-robot/robots-txt.html#recommend) by Yandex.
23 | */
24 | policy: Policy[];
25 | /**
26 | * @description
27 | * The location of a sitemap for this website.
28 | * @example
29 | * ```ts
30 | * sitemap: [
31 | * "https://example.com/sitemap.xml",
32 | * "https://www.example.com/sitemap.xml"
33 | * ]
34 | * ```
35 | * The value of the [SITEMAP](https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt#sitemap)
36 | * field is case-sensitive.
37 | */
38 | sitemap?: boolean | string | string[];
39 | /**
40 | * @default null
41 | * @description
42 | * Specify the value of `Host`, some crawlers(Yandex) support and only accept domain names.
43 | * @example
44 | * ```ts
45 | * host: siteUrl.replace(/^https?:\/\/|:\d+/g, "")
46 | * ```
47 | */
48 | host?: null | string;
49 | }
50 |
51 | const defaults: RobotsOptions = {
52 | policy: [
53 | {
54 | userAgent: "*",
55 | allow: "/",
56 | },
57 | ],
58 | sitemap: true,
59 | host: null,
60 | };
61 |
62 | export default function robots(options?: RobotsOptions): AstroIntegration {
63 | let config: AstroConfig;
64 | let finalSiteMapHref: string;
65 |
66 | const filename = "robots.txt";
67 | const opts = { ...defaults, ...options };
68 |
69 | return {
70 | name: "astro-robots",
71 | hooks: {
72 | "astro:config:setup": ({ config: cfg, }) => {
73 | config = cfg;
74 | if (config.site) {
75 | finalSiteMapHref = new URL(config.base, config.site).href;
76 | }
77 | },
78 | "astro:server:setup": ({ server, logger }) => {
79 | server.middlewares.use(async (req, res, next) => {
80 | if (req.url?.startsWith(`/${filename}`)) {
81 | res.setHeader("Content-Type", "text/plain");
82 | res.setHeader("Cache-Control", "no-cache");
83 | res.end(generate(opts, finalSiteMapHref, logger));
84 | } else {
85 | next();
86 | }
87 | });
88 | },
89 | "astro:build:done": async ({ dir, logger }) => {
90 | const fileURL = new URL(filename, dir);
91 | const destDir = fileURLToPath(dir);
92 | const fileBuffer = generate(opts, finalSiteMapHref, logger);
93 |
94 | try {
95 | await fs.mkdir(destDir, { recursive: true });
96 | await fs.writeFile(fileURL, fileBuffer, "utf-8");
97 | throw "done";
98 | } catch (e) {
99 | if( e === "done") {
100 | const fileSize = getFileSizeInKilobytes(fileBuffer);
101 | logInfo(fileSize, logger, destDir);
102 | } else {
103 | throw e;
104 | }
105 | }
106 | },
107 | },
108 | };
109 | }
110 |
--------------------------------------------------------------------------------
/src/types/Policy.ts:
--------------------------------------------------------------------------------
1 | import type { UserAgent } from "./UserAgent";
2 |
3 | export interface Policy {
4 | /**
5 | * @description
6 | * Indicates the robot to which the rules listed in "robots.txt" apply.
7 | * @example
8 | * ```ts
9 | * policy:[
10 | * {
11 | * userAgent: [
12 | * 'Googlebot',
13 | * 'Applebot',
14 | * 'Baiduspider',
15 | * 'bingbot'
16 | * ],
17 | * // crawling rule(s) for above bots
18 | * }
19 | * ]
20 | * ```
21 | * Verified bots, refer to [DITIG](https://www.ditig.com/robots-txt-template#regular-template) or [Cloudflare Radar](https://radar.cloudflare.com/traffic/verified-bots).
22 | */
23 | userAgent: UserAgent | UserAgent[];
24 | /**
25 | * @description
26 | * [ At least one or more `allow` or `disallow` entries per rule ] Allows indexing site sections or individual pages.
27 | * @example
28 | * ```ts
29 | * policy:[{allow:["/"]}]
30 | * ```
31 | * Path-based URL matching, refer to [SYNTAX](https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt#url-matching-based-on-path-values) via Google.
32 | */
33 | allow?: string | string[];
34 | /**
35 | * @description
36 | * [ At least one or more `disallow` or `allow` entries per rule ] Prohibits indexing site sections or individual pages.
37 | * @example
38 | * ```ts
39 | * policy:[
40 | * {
41 | * disallow:[
42 | * "/admin",
43 | * "/uploads/1989-08-21/*.jpg$"
44 | * ]
45 | * }
46 | * ]
47 | * ```
48 | * Path-based URL matching, refer to [SYNTAX](https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt#url-matching-based-on-path-values) via Google.
49 | */
50 | disallow?: string | string[];
51 | /**
52 | * @description
53 | *Specifies the minimum interval (in seconds) for the search robot to wait after loading one page, before starting to load another.
54 | *
55 | * @example
56 | * ```ts
57 | * policy:[{crawlDelay:5}]
58 | * ```
59 | * About the [Crawl-delay](https://yandex.com/support/webmaster/robot-workings/crawl-delay.html#crawl-delay) directive.
60 | */
61 | crawlDelay?: number;
62 | /**
63 | * @description
64 | * Indicates to the robot that the page URL contains parameters (like UTM tags) that should be ignored when indexing it.
65 | *
66 | * @example
67 | * ```shell
68 | * # for URLs like:
69 | * www.example2.com/index.php?page=1&sid=2564126ebdec301c607e5df
70 | * www.example2.com/index.php?page=1&sid=974017dcd170d6c4a5d76ae
71 | * ```
72 | *
73 | * ```js
74 | * policy:[
75 | * {
76 | * cleanParam: [
77 | * "sid /index.php",
78 | * ]
79 | * }
80 | * ]
81 | * ```
82 | * For more additional examples, see the
83 | * Yandex [SYNTAX](https://yandex.com/support/webmaster/robot-workings/clean-param.html#clean-param__additional) guide.
84 | */
85 | cleanParam?: string | string[];
86 | }
87 |
--------------------------------------------------------------------------------
/src/types/UserAgent.ts:
--------------------------------------------------------------------------------
1 | // Updates can be retrieved from: https://www.ditig.com/robots-txt-template
2 | // Last update: 2023-03-15
3 |
4 | export type SearchEngines = {
5 | // so.com chinese search engine
6 | 360: "360Spider" | "360Spider-Image" | "360Spider-Video" | "HaoSouSpider";
7 | // apple.com search engine
8 | Apple: "Applebot" | "AppleNewsBot";
9 | // baidu.com chinese search engine
10 | Baidu:
11 | | "Baiduspider"
12 | | "Baiduspider-image"
13 | | "Baiduspider-mobile"
14 | | "Baiduspider-news"
15 | | "Baiduspider-video";
16 | // bing.com international search engine
17 | Bing:
18 | | "bingbot"
19 | | "BingPreview"
20 | | "msnbot"
21 | | "msnbot-media"
22 | | "adidxbot"
23 | | "MSN";
24 | // bublup.com suggestion/search engine
25 | Bublup: "BublupBot";
26 | // cliqz.com german in-product search engine
27 | Cliqz: "Cliqzbot";
28 | // coccoc.com vietnamese search engine
29 | Coccoc: "coccoc" | "coccocbot-image" | "coccocbot-web";
30 | // daum.net korean search engine
31 | Daumoa: "Daumoa";
32 | // dazoo.fr french search engine
33 | Dazoo: "DeuSu";
34 | // duckduckgo.com international privacy search engine
35 | Duckduckgo: "DuckDuckBot" | "DuckDuckGo-Favicons-Bot";
36 | // eurip.com european search engine
37 | Eurip: "EuripBot";
38 | // exploratodo.com latin search engine
39 | Exploratodo: "Exploratodo";
40 | // findx.com european search engine
41 | Findx: "Findxbot";
42 | // goo.ne.jp japanese search engine
43 | Goo: "gooblog" | "ichiro";
44 | // google.com international search engine
45 | Google:
46 | | "Googlebot"
47 | | "Googlebot-Image"
48 | | "Googlebot-Mobile"
49 | | "Googlebot-News"
50 | | "Googlebot-Video"
51 | | "Mediapartners-Google"
52 | | "AdsBot-Google"
53 | | "AdsBot-Google-Mobile"
54 | | "AdsBot-Google-Mobile-Apps"
55 | | "Mediapartners-Google"
56 | | "Storebot-Google"
57 | | "Google-InspectionTool"
58 | | "FeedFetcher-Google";
59 | // istella.it italian search engine
60 | Istella: "istellabot";
61 | // jike.com / chinaso.com chinese search engine
62 | Jike: "JikeSpider";
63 | // lycos.com & hotbot.com international search engine
64 | Lycos: "Lycos";
65 | // mail.ru russian search engine
66 | Mail: "Mail.Ru";
67 | // mojeek.com search engine
68 | Mojeek: "MojeekBot";
69 | // orange.com international search engine
70 | Orange: "OrangeBot";
71 | // botje.nl dutch search engine
72 | Botje: "Plukkie";
73 | // qwant.com french search engine
74 | Qwant: "Qwantify";
75 | // rambler.ru russian search engine
76 | Rambler: "Rambler";
77 | // seznam.cz czech search engine
78 | Seznam: "SeznamBot";
79 | // soso.com chinese search engine
80 | Soso: "Sosospider";
81 | // yahoo.com international search engine
82 | Yahoo: "Slurp";
83 | // sogou.com chinese search engine
84 | Sogou:
85 | | "Sogou blog"
86 | | "Sogou inst spider"
87 | | "Sogou News Spider"
88 | | "Sogou Orion spider"
89 | | "Sogou spider2"
90 | | "Sogou web spider";
91 | // sputnik.ru russian search engine
92 | Sputnik: "SputnikBot";
93 | // ask.com international search engine
94 | Ask: "Teoma";
95 | // wotbox.com international search engine
96 | Wortbox: "wotbox";
97 | // yandex.com russian search engine
98 | Yandex: "Yandex" | "YandexMobileBot";
99 | // search.naver.com south korean search engine
100 | Naver: "Yeti";
101 | // yioop.com international search engine
102 | Yioop: "YioopBot";
103 | // yooz.ir iranian search engine
104 | Yooz: "yoozBot";
105 | // youdao.com chinese search engine
106 | Youdao: "YoudaoBot";
107 | };
108 |
109 | export type SocialNetwork = {
110 | // facebook.com social network
111 | Facebook: "facebookcatalog" | "facebookexternalhit" | "Facebot";
112 | // pinterest.com social networtk
113 | Pinterest: "Pinterest";
114 | // twitter.com social media bot
115 | Tittwer: "Twitterbot";
116 | // whatsapp.com preview bot
117 | WhatsApp: "WhatsApp";
118 | // linkedin.com search engine crawler
119 | LinkedIn: "LinkedInBot";
120 | };
121 |
122 | export type SearchEngineOptimization = {
123 | Ahrefs: "AhrefsBot";
124 | Moz: "Moz dotbot" | "Moz rogerbot";
125 | WebMeUp: "BLEXBot";
126 | Botify: "Botify";
127 | Babbar: "Barkrowler";
128 | SEMrush: "SEMrush" | "SemrushBotSI";
129 | Cxense: "Cxense";
130 | EzoicInc: "EzoicBot";
131 | DataForSEO: "DataForSEO";
132 | PrerenderLLC: "prerender";
133 | };
134 |
135 | export type UserAgent =
136 | | "*"
137 | | SearchEngines[keyof SearchEngines]
138 | | SocialNetwork[keyof SocialNetwork]
139 | | SearchEngineOptimization[keyof SearchEngineOptimization];
140 |
--------------------------------------------------------------------------------
/src/types/index.ts:
--------------------------------------------------------------------------------
1 | export type { Policy } from "./Policy";
2 |
--------------------------------------------------------------------------------
/src/utils/index.ts:
--------------------------------------------------------------------------------
1 | export { getFileSizeInKilobytes } from "./size";
2 | export { measureExecutionTime } from "./time";
3 |
--------------------------------------------------------------------------------
/src/utils/size.ts:
--------------------------------------------------------------------------------
1 | import { fileURLToPath } from "node:url";
2 | import fs from "fs/promises";
3 |
4 | // export async function getFileSizeInKilobytes(fileUrl: URL): Promise {
5 | // const filePath = fileURLToPath(fileUrl);
6 |
7 | // try {
8 | // await fs.access(filePath);
9 | // const fileBuffer = await fs.readFile(filePath); // 读取整个文件
10 | // return fileBuffer.length / 1024; // 文件字节数转 KB
11 | // } catch (error) {
12 | // console.error("Error reading file:", error);
13 | // return 0;
14 | // }
15 | // }
16 |
17 | export function getFileSizeInKilobytes(fileBuffer: string): number {
18 | return Buffer.byteLength(fileBuffer, 'utf8') / 1024; // 字符串长度转 KB
19 | }
20 |
21 |
--------------------------------------------------------------------------------
/src/utils/time.ts:
--------------------------------------------------------------------------------
1 | import { performance } from "perf_hooks";
2 |
3 | export function measureExecutionTime(callback: () => void): number {
4 | const startTime = performance.now();
5 | callback();
6 | const endTime = performance.now();
7 | const executionTime = Math.floor(endTime - startTime);
8 | return executionTime;
9 | }
10 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "astro/tsconfigs/strict"
3 | }
4 |
--------------------------------------------------------------------------------