├── .gitignore
├── package.json
├── LICENSE
├── index.js
├── generate.py
├── .github
└── workflows
│ ├── push.yml
│ └── BotLog.yml
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | urls.txt
2 | bing.json
3 | service_account.json
4 | package-lock.json
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "bulk-indexing-api",
3 | "version": "1.0.0",
4 | "description": "",
5 | "main": "index.js",
6 | "scripts": {
7 | "test": "echo \"Error: no test specified\" && exit 1"
8 | },
9 | "author": "",
10 | "license": "ISC",
11 | "dependencies": {
12 | "fs": "0.0.1-security",
13 | "googleapis": "^46.0.0",
14 | "request": "^2.88.0"
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 MHuiG
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | const fs = require('fs');
2 | var request = require('request');
3 | var { google } = require('googleapis');
4 | var key = require('./service_account.json');
5 |
6 | const jwtClient = new google.auth.JWT(
7 | key.client_email,
8 | null,
9 | key.private_key,
10 | ['https://www.googleapis.com/auth/indexing'],
11 | null
12 | );
13 |
14 | const batch = fs
15 | .readFileSync('urls.txt')
16 | .toString()
17 | .split('\n');
18 |
19 | jwtClient.authorize(function(err, tokens) {
20 | if (err) {
21 | console.log(err);
22 | return;
23 | }
24 |
25 | const items = batch.map(line => {
26 | return {
27 | 'Content-Type': 'application/http',
28 | 'Content-ID': '',
29 | body:
30 | 'POST /v3/urlNotifications:publish HTTP/1.1\n' +
31 | 'Content-Type: application/json\n\n' +
32 | JSON.stringify({
33 | url: line,
34 | type: 'URL_UPDATED'
35 | })
36 | };
37 | });
38 |
39 | const options = {
40 | url: 'https://indexing.googleapis.com/batch',
41 | method: 'POST',
42 | headers: {
43 | 'Content-Type': 'multipart/mixed'
44 | },
45 | auth: { bearer: tokens.access_token },
46 | multipart: items
47 | };
48 | request(options, (err, resp, body) => {
49 | console.log(body);
50 | });
51 | });
52 |
--------------------------------------------------------------------------------
/generate.py:
--------------------------------------------------------------------------------
1 | import re
2 | import json
3 | import urllib
4 | import urllib.request
5 | import random
6 |
7 | site = 'https://blog.mhuig.top'
8 | sitemaps = ['/post-sitemap.xml','/page-sitemap.xml']
9 |
10 | result = []
11 | bingData = {}
12 | i=0
13 |
14 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
15 |
16 | for sitemap in sitemaps:
17 | sitemap = site+sitemap
18 | req = urllib.request.Request(url=sitemap, headers=headers)
19 | html = urllib.request.urlopen(req).read().decode('utf-8')
20 | data = re.findall(re.compile(r'(?<=).*?(?=)'), html)
21 | result=result+data
22 |
23 |
24 | del result[0]
25 |
26 |
27 | bingUrllist=[]
28 | googleUrllist=[]
29 |
30 | for data in result:
31 | i=i+1
32 | result.remove(data)
33 | # bing 提交前5条
34 | if i <= 5:
35 | bingUrllist.append(data)
36 | # baidu google 提交前50条
37 | googleUrllist.append(data)
38 | if i == 50:
39 | break
40 |
41 | # bing 提交随机5条
42 | bingUrllist= bingUrllist + random.sample(result,5)
43 | # baidu google 提交随机50条
44 | googleUrllist=googleUrllist + random.sample(result,50)
45 |
46 | with open('urls.txt', 'w') as file:
47 | for data in googleUrllist:
48 | print(data, file=file)
49 |
50 |
51 | bingData["siteUrl"] = site
52 | bingData["urlList"] = bingUrllist
53 | with open("bing.json", "w") as f:
54 | json.dump(bingData,f)
55 |
56 | # with open('all-urls.txt', 'w') as file:
57 | # for data in result:
58 | # print(data, file=file)
--------------------------------------------------------------------------------
/.github/workflows/push.yml:
--------------------------------------------------------------------------------
1 | name: push
2 |
3 | on:
4 | push:
5 | schedule:
6 | - cron: '0 7 * * *'
7 | watch:
8 | types: [started]
9 | workflow_dispatch:
10 | jobs:
11 | build:
12 | if: github.repository == 'MHG-LAB/submit-urls-from-sitemap-to-search-engine'
13 | runs-on: ubuntu-latest
14 | steps:
15 | - name: Checkout
16 | uses: actions/checkout@master
17 | - name: Set up python
18 | uses: actions/setup-python@v1
19 | with:
20 | python-version: 3.13
21 | - name: Setup Node.js 10.x
22 | uses: actions/setup-node@master
23 | with:
24 | node-version: "10.x"
25 | - name: Npm install
26 | run: npm install
27 | - name: Install requests
28 | run: pip install requests
29 | - name: generate
30 | run: python generate.py
31 |
32 | - name: Push to Bing
33 | env:
34 | BING_TOKEN: ${{ secrets.BINGTOKEN }}
35 | run: curl -H "Content-Type:application/json" --data-binary @bing.json "https://ssl.bing.com/webmaster/api.svc/json/SubmitUrlbatch?apikey=${BING_TOKEN}"
36 |
37 | - name: Push to Google
38 | env:
39 | GOOGLE_SERVICE_ACCOUNT: ${{ secrets.GOOGLE_SERVICE_ACCOUNT }}
40 | run: |
41 | echo "$GOOGLE_SERVICE_ACCOUNT" > ./service_account.json
42 | node index
43 |
44 | - name: Push to BaiDu
45 | env:
46 | BAIDU_TOKEN: ${{ secrets.BAIDUTOKEN }}
47 | SITE: ${{ secrets.SITE }}
48 | run: curl -H 'Content-Type:text/plain' --data-binary @urls.txt "http://data.zz.baidu.com/urls?site=${SITE}&token=${BAIDU_TOKEN}"
49 |
--------------------------------------------------------------------------------
/.github/workflows/BotLog.yml:
--------------------------------------------------------------------------------
1 | name: Bot Log
2 | on:
3 | schedule:
4 | - cron: '0 20 * * *'
5 | push:
6 | workflow_dispatch:
7 | jobs:
8 | log:
9 | if: github.repository == 'MHG-LAB/submit-urls-from-sitemap-to-search-engine'
10 | runs-on: Ubuntu-latest
11 | env:
12 | TZ: Asia/Shanghai
13 | steps:
14 | - name: Setup Deploy Private Key
15 | env:
16 | BOT_DEPLOY_KEY: ${{ secrets.BOT_DEPLOY_KEY }}
17 | BOT_NAME: ${{ secrets.BOT_NAME }}
18 | BOT_EMAIL: ${{ secrets.BOT_EMAIL }}
19 | run: |
20 | mkdir -p ~/.ssh/
21 | echo "$BOT_DEPLOY_KEY" > ~/.ssh/id_rsa
22 | chmod 600 ~/.ssh/id_rsa
23 | ssh-keyscan github.com >> ~/.ssh/known_hosts
24 | git config --global user.email "$BOT_EMAIL"
25 | git config --global user.name "$BOT_NAME"
26 | - name: Setup Python 3
27 | uses: actions/setup-python@v5
28 | with:
29 | python-version: '3.10'
30 | architecture: 'x64'
31 | - name: Clone
32 | run: |
33 | git clone --recursive git@github.com:MHG-LAB/Workflow-Keep-Alive
34 | - name: Build
35 | env:
36 | BOT_GITHUB_TOKEN: ${{ secrets.BOT_GITHUB_TOKEN }}
37 | run: |
38 | cd Workflow-Keep-Alive
39 | sed -i "s/BOT_GITHUB_TOKEN/$BOT_GITHUB_TOKEN/g" ./main.py
40 | python main.py
41 | - name: Bot Log
42 | run: |
43 | mkdir botlog
44 | cd botlog
45 | Date=`date -d '+0 hours' +'%Y-%m-%d %H:%M:%S'`
46 | echo "[$Date] Successful" >> readme.md
47 | - name: Deploy
48 | uses: peaceiris/actions-gh-pages@v3
49 | with:
50 | deploy_key: ${{ secrets.BOT_DEPLOY_KEY }}
51 | user_name: ${{ secrets.BOT_NAME }}
52 | user_email: ${{ secrets.BOT_EMAIL }}
53 | publish_dir: ./botlog
54 | publish_branch: botlog
55 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # submit-urls-from-sitemap-to-search-engine
2 |
3 | ## 它可以干嘛
4 |
5 | 提取 sitemap 中的链接,利用百度、必应、谷歌 API **自动** 推送至搜索引擎,提升网站收录速度。
6 |
7 | ## 它要怎么用
8 |
9 | 请不要 fork 此仓库!! 使用模板导入 [Use this template](https://github.com/MHG-LAB/submit-urls-from-sitemap-to-search-engine/generate) !! 瞎点fork按钮发送垃圾 PR 将直接提交到 GitHub 黑名单中)
10 |
11 | 将 `generate.py` 文件中 `site` 的值修改为你的博客地址, `sitemaps` 变量的值修改为你的 sitemap.xml 地址,请确保你的 sitemap 为正常格式。
12 |
13 | ```py
14 | site = 'https://blog.xxx.cn'
15 | sitemaps = ['/sitemap1.xml','/sitemap2.xml']
16 | ```
17 |
18 | ### 百度
19 |
20 | 先前往百度资源搜索平台获取 `token`,就是 API 提交中,接口调用地址 `http://data.zz.baidu.com/urls?site=xxx&token=xxx`,`token=` 之后的那一串。
21 |
22 | `fork` 本仓库,`Settings > Secrets > new New secret`,`Name` 中填写 `BAIDUTOKEN`,`Value` 即刚刚获取的。(放入 Secrets 中能防止 token 泄露)。再新建一个 secret,`name` 为 `site`,`Value` 为你的博客地址,需要协议头,结尾不能有 `/`
23 |
24 | 好了,大功告成,接下来每天 GitHub 便会自动帮你推送链接至百度。
25 |
26 | #### 配额
27 |
28 | 每天前 50 个 URL + 随机 50 个 URL
29 |
30 | ### 必应
31 |
32 | 前往 ,`设置 -> API 访问 -> API 密钥 -> 新建`
33 |
34 | `Settings > Secrets > new New secret`,`Name` 中填写 `BINGTOKEN`,`Value` 填入刚刚新建的密钥
35 |
36 | #### 配额
37 |
38 | 每天前 5 个 URL + 随机 5 个 URL
39 |
40 | ### 谷歌
41 |
42 | 首先,您需要在 Google Cloud Platform 中设置对 Indexing API 的访问权限 - 按照以下说明进行操作。
43 |
44 | https://developers.google.com/search/apis/indexing-api/v3/prereqs
45 |
46 | 一旦您有权访问索引 API,您就可以下载公钥/私钥对 JSON 文件,其中包含您的所有凭据,并应保存为“service_account.json”。
47 |
48 | `Settings > Secrets > new New secret`,`Name` 中填写 `GOOGLE_SERVICE_ACCOUNT`,`Value` 填入刚刚新建的密钥
49 |
50 | #### 在 Search Console 中验证网站所有权以提交网址以编制索引
51 |
52 | 在此步骤中,您将验证您是否可以控制您的网络资产。
53 |
54 | 要验证您网站的所有权,您需要添加您的服务帐户电子邮件地址(请参阅 service_account.json - client_email)并将其添加为 Search Console 中网络媒体资源的所有者(“委托”)。
55 |
56 | 您可以在两个地方找到您的服务帐号电子邮件地址:
57 |
58 | - 您在创建项目时下载的 JSON 私钥中的 client_email 字段。
59 | - 开发者控制台中服务帐户视图的服务帐户 ID 列。
60 | - 电子邮件地址的格式类似于以下内容:
61 | 例如,“ my-service-account@test-project-42.google.com.iam.gserviceaccount.com ”。
62 |
63 | 然后...
64 |
65 | - 1.转到Google 网站管理员中心
66 |
67 | - 2.点击您经过验证的资源
68 |
69 | - 3.向下滚动并单击“添加所有者”。
70 |
71 | - 4.将您的服务帐号电子邮件地址作为资源的所有者添加到该资源中。
72 |
73 | #### 配额
74 |
75 | 每天前 50 个 URL + 随机 50 个 URL
76 |
77 | ---
78 |
79 | Enjoy it!
80 |
81 | ---
82 |
--------------------------------------------------------------------------------