├── .github └── workflows │ ├── BotLog.yml │ └── push.yml ├── .gitignore ├── LICENSE ├── README.md ├── generate.py ├── index.js └── package.json /.github/workflows/BotLog.yml: -------------------------------------------------------------------------------- 1 | name: Bot Log 2 | on: 3 | schedule: 4 | - cron: '0 20 * * *' 5 | push: 6 | workflow_dispatch: 7 | jobs: 8 | log: 9 | if: github.repository == 'MHG-LAB/submit-urls-from-sitemap-to-search-engine' 10 | runs-on: Ubuntu-latest 11 | env: 12 | TZ: Asia/Shanghai 13 | steps: 14 | - name: Setup Deploy Private Key 15 | env: 16 | BOT_DEPLOY_KEY: ${{ secrets.BOT_DEPLOY_KEY }} 17 | BOT_NAME: ${{ secrets.BOT_NAME }} 18 | BOT_EMAIL: ${{ secrets.BOT_EMAIL }} 19 | run: | 20 | mkdir -p ~/.ssh/ 21 | echo "$BOT_DEPLOY_KEY" > ~/.ssh/id_rsa 22 | chmod 600 ~/.ssh/id_rsa 23 | ssh-keyscan github.com >> ~/.ssh/known_hosts 24 | git config --global user.email "$BOT_EMAIL" 25 | git config --global user.name "$BOT_NAME" 26 | - name: Setup Python 3 27 | uses: actions/setup-python@v5 28 | with: 29 | python-version: '3.10' 30 | architecture: 'x64' 31 | - name: Clone 32 | run: | 33 | git clone --recursive git@github.com:MHG-LAB/Workflow-Keep-Alive 34 | - name: Build 35 | env: 36 | BOT_GITHUB_TOKEN: ${{ secrets.BOT_GITHUB_TOKEN }} 37 | run: | 38 | cd Workflow-Keep-Alive 39 | sed -i "s/BOT_GITHUB_TOKEN/$BOT_GITHUB_TOKEN/g" ./main.py 40 | python main.py 41 | - name: Bot Log 42 | run: | 43 | mkdir botlog 44 | cd botlog 45 | Date=`date -d '+0 hours' +'%Y-%m-%d %H:%M:%S'` 46 | echo "[$Date] Successful" >> readme.md 47 | - name: Deploy 48 | uses: peaceiris/actions-gh-pages@v3 49 | with: 50 | deploy_key: ${{ secrets.BOT_DEPLOY_KEY }} 51 | user_name: ${{ secrets.BOT_NAME }} 52 | user_email: ${{ secrets.BOT_EMAIL }} 53 | publish_dir: ./botlog 54 | publish_branch: botlog 55 | -------------------------------------------------------------------------------- /.github/workflows/push.yml: -------------------------------------------------------------------------------- 1 | name: push 2 | 3 | on: 4 | push: 5 | schedule: 6 | - cron: '0 7 * * *' 7 | watch: 8 | types: [started] 9 | workflow_dispatch: 10 | jobs: 11 | build: 12 | if: github.repository == 'MHG-LAB/submit-urls-from-sitemap-to-search-engine' 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@master 17 | - name: Set up python 18 | uses: actions/setup-python@v1 19 | with: 20 | python-version: 3.13 21 | - name: Setup Node.js 10.x 22 | uses: actions/setup-node@master 23 | with: 24 | node-version: "10.x" 25 | - name: Npm install 26 | run: npm install 27 | - name: Install requests 28 | run: pip install requests 29 | - name: generate 30 | run: python generate.py 31 | 32 | - name: Push to Bing 33 | env: 34 | BING_TOKEN: ${{ secrets.BINGTOKEN }} 35 | run: curl -H "Content-Type:application/json" --data-binary @bing.json "https://ssl.bing.com/webmaster/api.svc/json/SubmitUrlbatch?apikey=${BING_TOKEN}" 36 | 37 | - name: Push to Google 38 | env: 39 | GOOGLE_SERVICE_ACCOUNT: ${{ secrets.GOOGLE_SERVICE_ACCOUNT }} 40 | run: | 41 | echo "$GOOGLE_SERVICE_ACCOUNT" > ./service_account.json 42 | node index 43 | 44 | - name: Push to BaiDu 45 | env: 46 | BAIDU_TOKEN: ${{ secrets.BAIDUTOKEN }} 47 | SITE: ${{ secrets.SITE }} 48 | run: curl -H 'Content-Type:text/plain' --data-binary @urls.txt "http://data.zz.baidu.com/urls?site=${SITE}&token=${BAIDU_TOKEN}" 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | urls.txt 2 | bing.json 3 | service_account.json 4 | package-lock.json -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 MHuiG 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # submit-urls-from-sitemap-to-search-engine 2 | 3 | ## 它可以干嘛 4 | 5 | 提取 sitemap 中的链接,利用百度、必应、谷歌 API **自动** 推送至搜索引擎,提升网站收录速度。 6 | 7 | ## 它要怎么用 8 | 9 | 请不要 fork 此仓库!! 使用模板导入 [Use this template](https://github.com/MHG-LAB/submit-urls-from-sitemap-to-search-engine/generate) !! 瞎点fork按钮发送垃圾 PR 将直接提交到 GitHub 黑名单中) 10 | 11 | 将 `generate.py` 文件中 `site` 的值修改为你的博客地址, `sitemaps` 变量的值修改为你的 sitemap.xml 地址,请确保你的 sitemap 为正常格式。 12 | 13 | ```py 14 | site = 'https://blog.xxx.cn' 15 | sitemaps = ['/sitemap1.xml','/sitemap2.xml'] 16 | ``` 17 | 18 | ### 百度 19 | 20 | 先前往百度资源搜索平台获取 `token`,就是 API 提交中,接口调用地址 `http://data.zz.baidu.com/urls?site=xxx&token=xxx`,`token=` 之后的那一串。 21 | 22 | `fork` 本仓库,`Settings > Secrets > new New secret`,`Name` 中填写 `BAIDUTOKEN`,`Value` 即刚刚获取的。(放入 Secrets 中能防止 token 泄露)。再新建一个 secret,`name` 为 `site`,`Value` 为你的博客地址,需要协议头,结尾不能有 `/` 23 | 24 | 好了,大功告成,接下来每天 GitHub 便会自动帮你推送链接至百度。 25 | 26 | #### 配额 27 | 28 | 每天前 50 个 URL + 随机 50 个 URL 29 | 30 | ### 必应 31 | 32 | 前往 ,`设置 -> API 访问 -> API 密钥 -> 新建` 33 | 34 | `Settings > Secrets > new New secret`,`Name` 中填写 `BINGTOKEN`,`Value` 填入刚刚新建的密钥 35 | 36 | #### 配额 37 | 38 | 每天前 5 个 URL + 随机 5 个 URL 39 | 40 | ### 谷歌 41 | 42 | 首先,您需要在 Google Cloud Platform 中设置对 Indexing API 的访问权限 - 按照以下说明进行操作。 43 | 44 | https://developers.google.com/search/apis/indexing-api/v3/prereqs 45 | 46 | 一旦您有权访问索引 API,您就可以下载公钥/私钥对 JSON 文件,其中包含您的所有凭据,并应保存为“service_account.json”。 47 | 48 | `Settings > Secrets > new New secret`,`Name` 中填写 `GOOGLE_SERVICE_ACCOUNT`,`Value` 填入刚刚新建的密钥 49 | 50 | #### 在 Search Console 中验证网站所有权以提交网址以编制索引 51 | 52 | 在此步骤中,您将验证您是否可以控制您的网络资产。 53 | 54 | 要验证您网站的所有权,您需要添加您的服务帐户电子邮件地址(请参阅 service_account.json - client_email)并将其添加为 Search Console 中网络媒体资源的所有者(“委托”)。 55 | 56 | 您可以在两个地方找到您的服务帐号电子邮件地址: 57 | 58 | - 您在创建项目时下载的 JSON 私钥中的 client_email 字段。 59 | - 开发者控制台中服务帐户视图的服务帐户 ID 列。 60 | - 电子邮件地址的格式类似于以下内容: 61 | 例如,“ my-service-account@test-project-42.google.com.iam.gserviceaccount.com ”。 62 | 63 | 然后... 64 | 65 | - 1.转到Google 网站管理员中心 66 | 67 | - 2.点击您经过验证的资源 68 | 69 | - 3.向下滚动并单击“添加所有者”。 70 | 71 | - 4.将您的服务帐号电子邮件地址作为资源的所有者添加到该资源中。 72 | 73 | #### 配额 74 | 75 | 每天前 50 个 URL + 随机 50 个 URL 76 | 77 | --- 78 | 79 | Enjoy it! 80 | 81 | --- 82 | -------------------------------------------------------------------------------- /generate.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | import urllib 4 | import urllib.request 5 | import random 6 | 7 | site = 'https://blog.mhuig.top' 8 | sitemaps = ['/post-sitemap.xml','/page-sitemap.xml'] 9 | 10 | result = [] 11 | bingData = {} 12 | i=0 13 | 14 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'} 15 | 16 | for sitemap in sitemaps: 17 | sitemap = site+sitemap 18 | req = urllib.request.Request(url=sitemap, headers=headers) 19 | html = urllib.request.urlopen(req).read().decode('utf-8') 20 | data = re.findall(re.compile(r'(?<=).*?(?=)'), html) 21 | result=result+data 22 | 23 | 24 | del result[0] 25 | 26 | 27 | bingUrllist=[] 28 | googleUrllist=[] 29 | 30 | for data in result: 31 | i=i+1 32 | result.remove(data) 33 | # bing 提交前5条 34 | if i <= 5: 35 | bingUrllist.append(data) 36 | # baidu google 提交前50条 37 | googleUrllist.append(data) 38 | if i == 50: 39 | break 40 | 41 | # bing 提交随机5条 42 | bingUrllist= bingUrllist + random.sample(result,5) 43 | # baidu google 提交随机50条 44 | googleUrllist=googleUrllist + random.sample(result,50) 45 | 46 | with open('urls.txt', 'w') as file: 47 | for data in googleUrllist: 48 | print(data, file=file) 49 | 50 | 51 | bingData["siteUrl"] = site 52 | bingData["urlList"] = bingUrllist 53 | with open("bing.json", "w") as f: 54 | json.dump(bingData,f) 55 | 56 | # with open('all-urls.txt', 'w') as file: 57 | # for data in result: 58 | # print(data, file=file) -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | var request = require('request'); 3 | var { google } = require('googleapis'); 4 | var key = require('./service_account.json'); 5 | 6 | const jwtClient = new google.auth.JWT( 7 | key.client_email, 8 | null, 9 | key.private_key, 10 | ['https://www.googleapis.com/auth/indexing'], 11 | null 12 | ); 13 | 14 | const batch = fs 15 | .readFileSync('urls.txt') 16 | .toString() 17 | .split('\n'); 18 | 19 | jwtClient.authorize(function(err, tokens) { 20 | if (err) { 21 | console.log(err); 22 | return; 23 | } 24 | 25 | const items = batch.map(line => { 26 | return { 27 | 'Content-Type': 'application/http', 28 | 'Content-ID': '', 29 | body: 30 | 'POST /v3/urlNotifications:publish HTTP/1.1\n' + 31 | 'Content-Type: application/json\n\n' + 32 | JSON.stringify({ 33 | url: line, 34 | type: 'URL_UPDATED' 35 | }) 36 | }; 37 | }); 38 | 39 | const options = { 40 | url: 'https://indexing.googleapis.com/batch', 41 | method: 'POST', 42 | headers: { 43 | 'Content-Type': 'multipart/mixed' 44 | }, 45 | auth: { bearer: tokens.access_token }, 46 | multipart: items 47 | }; 48 | request(options, (err, resp, body) => { 49 | console.log(body); 50 | }); 51 | }); 52 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "bulk-indexing-api", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "author": "", 10 | "license": "ISC", 11 | "dependencies": { 12 | "fs": "0.0.1-security", 13 | "googleapis": "^46.0.0", 14 | "request": "^2.88.0" 15 | } 16 | } 17 | --------------------------------------------------------------------------------