├── .env
├── old
├── progress.py
├── Makefile
├── settings.py
├── spider_test.py
├── getImg.py
├── README.md
└── spider.py
├── .gitignore
├── src
├── my.py
├── download_img.py
├── util
│ ├── table.py
│ ├── config.py
│ ├── mysql_helper.py
│ └── Bmob.py
├── files
│ ├── unicodeEmoji.html
│ ├── unicodeDesc.html
│ ├── imgEmoji.html
│ └── unicode.json
├── emoji.py
└── main.py
├── README.md
├── .vscode
└── setting.json
└── docs
├── index.md
└── index.html
/.env:
--------------------------------------------------------------------------------
1 | PYTHONPATH=./src
--------------------------------------------------------------------------------
/old/progress.py:
--------------------------------------------------------------------------------
1 | crawled = []
2 | crawling = [1669879400]
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | weibo
3 | profile.txt
4 | .idea
5 | *.log
6 | imgs
--------------------------------------------------------------------------------
/old/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | python spider.py
3 | clean:
4 | rm -r log/*
5 | rm -r weibo/*
--------------------------------------------------------------------------------
/src/my.py:
--------------------------------------------------------------------------------
1 | from util.mysql_helper import *
2 |
3 | create_db_if_not_exists()
4 | create_table_if_not_exists()
5 |
--------------------------------------------------------------------------------
/src/download_img.py:
--------------------------------------------------------------------------------
1 | from emoji import *
2 | import requests
3 |
4 | (img_desc_arr, img_link_arr) = get_img_emojis()
5 |
6 | CUR_DIR = os.path.dirname(os.path.realpath('__file__')) + os.sep
7 |
8 | print(img_desc_arr)
9 |
10 | def download_pic(name, url):
11 | p = requests.get(url)
12 | with open(CUR_DIR + '/imgs/' + name + '.png', "wb") as f:
13 | f.write(p.content)
14 |
15 | for i in range(len(img_desc_arr)):
16 | download_pic(f'img_{i}', img_link_arr[i])
--------------------------------------------------------------------------------
/src/util/table.py:
--------------------------------------------------------------------------------
1 | from .Bmob import BmobSDK, BmobModel
2 |
3 | class Emoji(BmobModel):
4 | desc = ''
5 | content = ''
6 |
7 | class Crawling(BmobModel):
8 | uid = 0
9 | uname = ''
10 |
11 | class Crawled(BmobModel):
12 | uid = 0
13 | uname = ''
14 |
15 | class Weibo(BmobModel):
16 | mid = ''
17 | text = ''
18 | img_emoji = []
19 |
20 | class Comment(BmobModel):
21 | cid = ''
22 | mid = ''
23 | text = ''
24 | img_emoji = []
25 |
--------------------------------------------------------------------------------
/old/settings.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # 访问 m.weibo.cn 的 request headers
4 | DEFAULT_REQUEST_HEADERS = {
5 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:61.0) Gecko/20100101 Firefox/61.0'
6 | }
7 |
8 | # info for https://passport.weibo.cn/signin/login
9 | USER_PASSWORD = [
10 | {
11 | 'user': 'xxxxx',
12 | 'password': 'xxxxx'
13 | }
14 | ]
15 |
16 | # Chrome driver 的路径,前往 http://chromedriver.chromium.org/downloads 下载
17 | CHROME_DRIVER_PATH = "D:\\software\\chromedriver.exe"
18 |
19 | # 请求次数,每次请求返回的数据大约10条,因具体 API 而异
20 | WAITING_FOR_REQUESTS = 1
21 | # 停多少秒
22 | DELAY = 2
23 | # 允许同时爬取的最多用户数量
24 | MAX_CRAWING_USERS = 1
25 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Weibo API
2 |
3 | 广度优先搜索按照 following 关系爬取所有微博、评论和表情,利用 MySQL 存储爬取到的微博信息,可以在 `config.py` 中进行配置。
4 |
5 | 
6 |
7 | ## 使用
8 |
9 | 1. 安装 MySQL,无需操心建表过程,程序自动完成
10 | 2. 前往配置 [config.py](src/util/config.py) 填充配置信息
11 | 3. 运行 `python src/main.py` 开始爬取
12 |
13 | ## 参考资料
14 |
15 | 1. [Request sessions](https://2.python-requests.org//en/latest/user/advanced/#session-objects)
16 | 2. [一行代码将 cookie 字符串转换成字典对象](https://foofish.net/extract_cookie.html)
17 | 3. [如何在requests session中手动设置cookie](https://blog.csdn.net/mgxcool/article/details/52663382)
18 | 4. [Python + MySQL 编码问题](https://stackoverflow.com/a/20349552/8242705)
19 |
--------------------------------------------------------------------------------
/.vscode/setting.json:
--------------------------------------------------------------------------------
1 | // Place your settings in this file to overwrite default and user settings.
2 |
3 | {
4 | "files.exclude": {
5 | "**/.git": true, // this is a default value
6 | "**/.DS_Store": true, // this is a default value
7 |
8 | "**/node_modules": true, // this excludes all folders
9 | // named "node_modules" from
10 | // the explore tree
11 |
12 | // alternative version
13 | "node_modules": true, // this excludes the folder
14 | // only from the root of
15 | // your workspace
16 | "weibo": true,
17 | "log": true
18 | }
19 | }
--------------------------------------------------------------------------------
/old/spider_test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from spider import WBSpider
3 |
4 | # 使用实例,输入一个用户id,所有信息都会存储在wb实例中
5 | user_id = 1669879400 # 可以改成任意合法的用户id(爬虫的微博id除外)
6 | filter = 0 # 值为0表示爬取全部微博(原创微博+转发微博),值为1表示只爬取原创微博
7 | pic_download = 0 # 值为0代表不下载微博原始图片,1代表下载微博原始图片
8 | wb = WBSpider(user_id, filter, pic_download) # 调用Weibo类,创建微博实例wb
9 | wb.start() # 爬取微博信息
10 |
11 | class TestStringMethods(unittest.TestCase):
12 |
13 | # def test_get_user_info(self):
14 | # wb.get_user_info()
15 | # assert wb.userInfo["screen_name"] == 'Dear-迪丽热巴'
16 | # assert wb.weobo_containerid == "1076031669879400"
17 | def test_start(self):
18 | assert wb.total_pages > 0
19 | wb.start()
20 |
21 | if __name__ == '__main__':
22 | unittest.main()
--------------------------------------------------------------------------------
/old/getImg.py:
--------------------------------------------------------------------------------
1 | import re
2 | import os,stat
3 | import urllib.request
4 | filename = "test.txt"
5 | file = open(filename,encoding='UTF-8')
6 | text = file.readlines()
7 | img = {}
8 | for line in text:
9 | pattern = '
]*/>'
10 | result = re.findall(pattern, line)
11 | for r in result:
12 | pattern_1 = 'alt=.*?.]'
13 | p1 = re.findall(pattern_1, r)
14 | x = p1[0].split("[")
15 | xx = x[1].replace("]", "")
16 | # print(xx)
17 | pattern_2 = 'h5.sinaimg.*?\.png'
18 | p2 = re.findall(pattern_2, r)
19 | # print(p2[0])
20 | img[xx] = p2[0]
21 |
22 | print(img)
23 | url_list = "http://h5.sinaimg.cn/m/emoticon/icon/default/d_tu-b5c18d9140.png"
24 | save_path = 'weibo\\Images\\1.png'
25 |
26 |
27 | for name in img.keys():
28 | save_path = 'weibo\\Images\\' + str(name) + '.png'
29 | url_list = img[name]
30 | pic_file = urllib.request.urlopen("http://" + url_list).read()
31 | f = open(save_path, "wb")
32 | f.write(pic_file)
33 | f.close()
--------------------------------------------------------------------------------
/old/README.md:
--------------------------------------------------------------------------------
1 | # weiboAPI
2 |
3 | m.weibo.cn API 收集,利用这些 API 可以轻松获取用户微博、评论、转发信息。
4 |
5 | ## 用法
6 |
7 | 1. 首先前往 http://chromedriver.chromium.org/downloads 下载 Chrome driver。如果没有安装 Chrome 的话也需要安装上对应的版本。
8 | 2. 前往 [`progress.py`](progress.py) 配置开始爬取的种子用户,在 `crawling` 数组中加入一个用户 id。用户 id 获取的方法可以参考[这里](https://github.com/dataabc/weiboSpider/blob/b4172051adeba8d2f699c3209bafda63d8a12b1d/README.md#%E5%A6%82%E4%BD%95%E8%8E%B7%E5%8F%96user_id)。
9 | 3. 前往 [`settings.py`](settings.py) 配置账户、Chrome driver 路径、并行程度等其他信息。(目前并行没有做得很好,因为开多个 Chrome driver 的话需要重复地进行手动登录,建议 `MAX_CRAWING_USERS` 设为 1)
10 | 4. 执行 `make` 或者 `python spider.py` 开始运行爬虫。
11 |
12 | ## 注意事项
13 |
14 | 注意运行过程中可能需要手动进行登录操作,这是可以根据命令行提示进行操作。
15 |
16 | 因为一条微博的评论可以达到 20 万条甚至更多,为了节省资源(微博有爬取频次限制)、减少重复信息,对于一条微博只抓取前 200 条评论。
17 |
18 | 同时为了抓取到更广泛的信息,采用并行抓取方式,用户可自行在 `settings.py` 中设置最多并行爬取用户数。
19 |
20 | ## API
21 |
22 | 我整理成了网页,参见 https://upupming.site/weiboAPI/index.html
23 |
24 | ## 可能会遇到的问题
25 |
26 | 修复[输出重定向错误](http://blog.mathieu-leplatre.info/python-utf-8-print-fails-when-redirecting-stdout.html):
27 |
28 | ```pwsh
29 | $env:PYTHONIOENCODING = "utf_8"
30 | ```
31 |
32 | 运行(无需 Cookie):
33 |
34 | ```pwsh
35 | python.exe .\spider.py
36 | ```
37 |
--------------------------------------------------------------------------------
/src/util/config.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*
2 |
3 | config = {
4 | # Bmob 配置
5 | 'bmob': {
6 | 'APP_ID': '366372322020724a39d8de5ccd61eeaa',
7 | 'REST_API_KEY': '40de9f3e91287703e695fe1f6b94393a',
8 | },
9 | # 微博配置
10 | 'weibo': {
11 | # Cookie 获取方法:前往 m.weibo.cn,打开一条评论较多的微博全文,往下翻几页
12 | # 这时 Chrome 的 Network 界面的 request headers 就会有 Cookie 信息了
13 | # 注意: m.weibo.cn 比较特殊,查看微博并不需要登录,而看评论确实是需要的
14 | # 比如直接进这个网址 https://m.weibo.cn/detail/4389138709375153,往后多翻几条评论在 Network 的 XHR 里面可以看到 request headers 的 Cookie
15 | 'COOKIE': 'ALF=1564734503; SCF=AuUY2ywPv1KKDsxqBgngDXYn7XTsKn_5p4iBblRihSxO8mUlZ5DB13iaxpPOY50QQzi_qq8HXRkR0NEl6MjJ-Ts.; SUB=_2A25wGOOGDeRhGeFP4lcU9SfJzD-IHXVT4o3OrDV6PUJbktBeLW_RkW1NQO_UfEy6P_rwgaJHDE-0R3sOldFws7cD; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9W5aERPgdESA6l4AaFTr3jGy5JpX5K-hUgL.FoMp1K-fSK.fS0e2dJLoIp7LxKML1KBLBKnLxKqL1hnLBoM7SKnRe0eRe0z0; SUHB=0laVFNbqkGjoCM; _T_WM=68656738488; WEIBOCN_FROM=1110106030; MLOGIN=1; XSRF-TOKEN=aa01d1; M_WEIBOCN_PARAMS=luicode%3D10000011%26lfid%3D1076031669879400%26uicode%3D20000061%26fid%3D4389138709375153%26oid%3D4389138709375153'
16 | },
17 | 'mysql': {
18 | 'CONNECTION': {
19 | 'host': "localhost",
20 | 'user': 'upupming',
21 | 'charset': 'utf8mb4'
22 | }
23 | },
24 | 'crawl': {
25 | # 用来初始化爬取队列
26 | 'START_USER': '2803301701',
27 | # 每两次请求之间等待 PERIOD 秒
28 | 'PERIOD': 4,
29 | # 被封之后等待 5 分钟再次请求
30 | 'FORBID_PAUSE': 300
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/files/unicodeEmoji.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
People
6 |
😄😆😊😃☺️😏😍😘😚😳😌😆😁😉😜😝😀😗😙😛😴😟😦😧😮😬😕😯😑😒😅😓😥😩😔😞😖😨😰😣😢😭😂😲😱😫😠😡😤😪😋😷😎😵👿😈😐😶😇👽💛💙💜❤️💚💔💓💗💕💞💘💖✨⭐️🌟💫💥💥💢❗️❓❕❔💤💨💦🎶🎵🔥💩💩💩👍👍👎👎👌👊👊✊✌️👋✋✋👐☝️👇👈👉🙌🙏👆👏💪🤘🖕🚶🏃🏃👫👪👬👭💃👯🙆🙅💁🙋👰🙎🙍🙇💑💆💇💅👦👧👩👨👶👵👴👱👲👳👷👮👼👸😺😸😻😽😼🙀😿😹😾👹👺🙈🙉🙊💂💀🐾👄💋💧👂👀👃👅💌👤👥💬💭
7 |
8 |
Nature
9 |
☀️☔️☁️❄️⛄️⚡️🌀🌁🌊🐱🐶🐭🐹🐰🐺🐸🐯🐨🐻🐷🐽🐮🐗🐵🐒🐴🐎🐫🐑🐘🐼🐍🐦🐤🐥🐣🐔🐧🐢🐛🐝🐜🐞🐌🐙🐠🐟🐳🐋🐬🐄🐏🐀🐃🐅🐇🐉🐐🐓🐕🐖🐁🐂🐲🐡🐊🐪🐆🐈🐩🐾💐🌸🌷🍀🌹🌻🌺🍁🍃🍂🌿🍄🌵🌴🌲🌳🌰🌱🌼🌾🐚🌐🌞🌝🌚🌑🌒🌓🌔🌕🌖🌗🌘🌜🌛🌔🌍🌎🌏🌋🌌⛅️
10 |
Objects
11 |
🎍💝🎎🎒🎓🎏🎆🎇🎐🎑🎃👻🎅🎄🎁🔔🔕🎋🎉🎊🎈🔮💿📀💾📷📹🎥💻📺📱☎️☎️📞📟📠💽📼🔉🔈🔇📢📣⌛️⏳⏰⌚️📻📡➿🔍🔎🔓🔒🔏🔐🔑💡🔦🔆🔅🔌🔋📲✉️📫📮🛀🛁🚿🚽🔧🔩🔨💺💰💴💵💷💶💳💸📧📥📤✉️📨📯📪📬📭📦🚪🚬💣🔫🔪💊💉📄📃📑📊📈📉📜📋📆📅📇📁📂✂️📌📎✒️✏️📏📐📕📗📘📙📓📔📒📚🔖📛🔬🔭📰🏈🏀⚽️⚾️🎾🎱🏉🎳⛳️🚵🚴🏇🏂🏊🏄🎿♠️♥️♣️♦️💎💍🏆🎼🎹🎻👾🎮🃏🎴🎲🎯🀄️🎬📝📝📖🎨🎤🎧🎺🎷🎸👞👡👠💄👢👕👕👔👚👗🎽👖👘👙🎀🎩👑👒👞🌂💼👜👝👛👓🎣☕️🍵🍶🍼🍺🍻🍸🍹🍷🍴🍕🍔🍟🍗🍖🍝🍛🍤🍱🍣🍥🍙🍘🍚🍜🍲🍢🍡🥚🍞🍩🍮🍦🍨🍧🎂🍰🍪🍫🍬🍭🍯🍎🍏🍊🍋🍒🍇🍉🍓🍑🍈🍌🍐🍍🍠🍆🍅🌽
12 |
Places
13 |
🏠🏡🏫🏢🏣🏥🏦🏪🏩🏨💒⛪️🏬🏤🌇🌆🏯🏰⛺️🏭🗼🗾🗻🌄🌅🌠🗽🌉🎠🌈🎡⛲️🎢🚢🚤⛵️⛵️🚣⚓️🚀✈️🚁🚂🚊🚞🚲🚡🚟🚠🚜🚙🚘🚗🚗🚕🚖🚛🚌🚍🚨🚓🚔🚒🚑🚐🚚🚋🚉🚆🚅🚄🚈🚝🚃🚎🎫⛽️🚦🚥⚠️🚧🔰🏧🎰🚏💈♨️🏁🎌🏮🗿🎪🎭📍🚩🇯🇵🇰🇷🇨🇳🇺🇸🇫🇷🇪🇸🇮🇹🇷🇺🇬🇧🇬🇧🇩🇪
14 |
Symbols
15 |
1️⃣2️⃣3️⃣4️⃣5️⃣6️⃣7️⃣8️⃣9️⃣🔟🔢0️⃣#️⃣🔣◀️⬇️▶️⬅️🔠🔡🔤↙️↘️➡️⬆️↖️↗️⏬⏫🔽⤵️⤴️↩️↪️↔️↕️🔼🔃🔄⏪⏩ℹ️🆗🔀🔁🔂🆕🔝🆙🆒🆓🆖🎦🈁📶🈹🈴🈺🈯️🈷️🈶🈵🈚️🈸🈳🈲🈂️🚻🚹🚺🚼🚭🅿️♿️🚇🛄🉑🚾🚰🚮㊙️㊗️Ⓜ️🛂🛅🛃🉐🆑🆘🆔🚫🔞📵🚯🚱🚳🚷🚸⛔️✳️❇️✴️💟🆚📳📴💹💱♈️♉️♊️♋️♌️♍️♎️♏️♐️♑️♒️♓️⛎🔯❎🅰️🅱️🆎🅾️💠♻️🔚🔙🔛🔜🕐🕜🕙🕥🕚🕦🕛🕧🕑🕝🕒🕞🕓🕟🕔🕠🕕🕡🕖🕢🕗🕣🕘🕤💲©️®️™️❌❗️‼️⁉️⭕️✖️➕➖➗💮💯✔️☑️🔘🔗➰〰️〽️🔱▪️▫️◾️◽️◼️◻️⬛️⬜️✅🔲🔳⚫️⚪️🔴🔵🔷🔶🔹🔸🔺🔻
16 |
--------------------------------------------------------------------------------
/src/util/mysql_helper.py:
--------------------------------------------------------------------------------
1 | import mysql.connector
2 | from .config import *
3 |
4 | MYDB = mysql.connector.connect(**config['mysql']['CONNECTION'])
5 | MYCURSOR = MYDB.cursor()
6 |
7 | def create_db_if_not_exists():
8 | MYCURSOR.execute('create database if not exists seq2emoji')
9 | def create_table_if_not_exists():
10 | """
11 | 返回指向表的 MYDB 和 MYCURSOR
12 | """
13 | global MYDB, MYCURSOR
14 | MYDB = mysql.connector.connect(**config['mysql']['CONNECTION'], database='seq2emoji')
15 | MYCURSOR = MYDB.cursor()
16 |
17 | # Crawling
18 | MYCURSOR.execute("""
19 | create table if not exists Crawling (
20 | uid varchar(255) not null unique,
21 | uname varchar
22 | (255) CHARACTER SET utf8mb4 collate utf8mb4_unicode_520_ci,
23 | primary key (uid)
24 | )
25 | """)
26 | MYCURSOR.execute("SELECT * FROM Crawling")
27 | myresult = MYCURSOR.fetchall()
28 | # 当前没有正在抓取的用户,就初始化一下
29 | if len(myresult) == 0:
30 | MYCURSOR.execute('insert into Crawling (uid, uname) values (%s, %s)', (config['crawl']['START_USER'], None))
31 | MYDB.commit()
32 | # Crawled
33 | MYCURSOR.execute("""
34 | create table if not exists Crawled (
35 | uid varchar(255) not null unique,
36 | uname varchar
37 | (255) CHARACTER SET utf8mb4 collate utf8mb4_unicode_520_ci,
38 | primary key (uid)
39 | )
40 | """)
41 | # Weibo
42 | MYCURSOR.execute("""
43 | create table if not exists Weibo (
44 | uid varchar(255),
45 | mid varchar(255) not null unique,
46 | text text CHARACTER SET utf8mb4 collate utf8mb4_unicode_520_ci,
47 | img_emoji json,
48 | primary key (mid)
49 | )
50 | """)
51 | # Comment
52 | MYCURSOR.execute("""
53 | create table if not exists Comment (
54 | mid varchar(255) not null,
55 | cid varchar(255) not null unique,
56 | text text CHARACTER SET utf8mb4 collate utf8mb4_unicode_520_ci,
57 | img_emoji json,
58 | primary key (cid)
59 | )
60 | """)
61 |
62 | return (MYDB, MYCURSOR)
63 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 | # weiboAPI
2 |
3 | 搜集微博 API,详情请见下面的示例。
4 |
5 |
6 |
7 | ## userInfo
8 |
9 | `https://m.weibo.cn/api/container/getIndex?type=uid&value=1669879400`
10 |
11 |
12 |
13 |
19 |
20 | ## cards
21 |
22 | `https://m.weibo.cn/api/container/getIndex?type=uid&value=1669879400&containerid=1076031669879400`
23 |
24 |
25 |
26 |
32 |
33 | ## statuses
34 |
35 | `https://m.weibo.cn/statuses/extend?id=4383487819465288`
36 |
37 |
38 |
39 |
45 |
46 | ## retweet
47 |
48 | `https://m.weibo.cn/statuses/show?id=HyZVegYAP`
49 |
50 |
51 |
52 |
58 |
59 | ## comments1
60 |
61 | `https://m.weibo.cn/api/comments/show?id=4383183661430868&page=1`
62 |
63 |
64 |
65 |
71 |
72 | ## comments2
73 |
74 | `https://m.weibo.cn/comments/hotflow?id=4383183661430868&mid=4383183661430868&max_id_type=0`
75 |
76 |
77 |
78 |
79 |
85 |
86 |
--------------------------------------------------------------------------------
/src/emoji.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*
2 |
3 | from lxml import etree
4 | import os, json
5 | from util.Bmob import *
6 | from util.table import *
7 | import time
8 |
9 | CUR_DIR = os.path.dirname(os.path.realpath('__file__')) + os.sep
10 | BmobSDK.setup(config["bmob"]["APP_ID"], config["bmob"]["REST_API_KEY"])
11 |
12 | def get_img_emojis():
13 | """
14 | 返回两个数组,分别包含表情描述、表情图片网址
15 | """
16 | data = None
17 | with open(CUR_DIR + 'files/imgEmoji.html', 'r', encoding='UTF-8') as file:
18 | data = file.read()
19 |
20 | selector = etree.HTML(data)
21 |
22 | img_desc_arr = selector.xpath('//span/img/@alt')
23 | img_link_arr = selector.xpath('//span/img/@src')
24 | return (img_desc_arr, img_link_arr)
25 |
26 | def get_unicode_emojis():
27 | """
28 | 返回两个数组,分别包含表情英文描述、表情 Unicode 表示
29 | 实测不太好用,只要是收集描述、表情的数据长度不匹配
30 | """
31 | data = None
32 | with open(CUR_DIR + 'files/unicodeEmoji.html', 'r', encoding='UTF-8') as file:
33 | data = file.read()
34 |
35 | selector = etree.HTML(data)
36 |
37 | unicode_emoji_arr = selector.xpath('//div[@class="emojis"]/text()')
38 |
39 | unicode_desc_arr = [[] for i in range(5)]
40 | index = 0
41 | started = False
42 | with open(CUR_DIR + 'files/unicodeDesc.html', 'r', encoding='UTF-8') as file:
43 | for data in file:
44 | if data == '\n':
45 | started = not started
46 | # 结束后将 index 加一
47 | if started == False and len(unicode_desc_arr[index]) > 0:
48 | index += 1
49 | elif started and data.startswith(':'):
50 | # 去掉第一个冒号,最后一个冒号和换行符号
51 | unicode_desc_arr[index].append(data[1:len(data)-2])
52 | else:
53 | # 遇到 People 记得置为 False
54 | started = False
55 | lens = 0
56 | for i in range(5):
57 | print(f'len of desc {i}: {len(unicode_desc_arr[i])}')
58 | lens += len(unicode_desc_arr[i])
59 | print(f'len of emoji {i}: {len(unicode_emoji_arr[i])}')
60 | print(lens)
61 | return (unicode_desc_arr, unicode_emoji_arr)
62 |
63 | def get_unicode_emojis_from_mdit():
64 | """
65 | 返回 dict,以表情描述为 key、表情 Unicode 为 value
66 | """
67 | data = None
68 | # 此文件复制于 https://github.com/markdown-it/markdown-it-emoji/blob/master/lib/data/full.json
69 | with open(CUR_DIR + 'files/unicode.json', 'r', encoding='UTF-8') as file:
70 | data = json.load(file)
71 | return data
72 |
73 | def insert_img_emojis():
74 | (img_desc_arr, img_link_arr) = get_img_emojis()
75 | for i in range(len(img_desc_arr)):
76 | emoji = Emoji(desc=img_desc_arr[i], content=img_link_arr[i])
77 | emoji.save()
78 |
79 | def insert_unicode_emojis():
80 | data = get_unicode_emojis_from_mdit()
81 | for key in data.keys():
82 | emoji = Emoji(desc=key, content=data[key])
83 | emoji.save()
84 |
85 |
86 | if __name__ == "__main__":
87 | insert_unicode_emojis()
88 | insert_img_emojis()
--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | weiboAPI
6 |
7 |
8 |
9 |
10 |
13 |
20 |
21 |
22 |
23 |
24 | weiboAPI
25 | 搜集微博 API,详情请见下面的示例。
26 |
27 | userInfo
28 | https://m.weibo.cn/api/container/getIndex?type=uid&value=1669879400
29 |
30 |
31 |
37 | cards
38 | https://m.weibo.cn/api/container/getIndex?type=uid&value=1669879400&containerid=1076031669879400
39 |
40 |
41 |
47 | statuses
48 | https://m.weibo.cn/statuses/extend?id=4383487819465288
49 |
50 |
51 |
57 |
58 | https://m.weibo.cn/statuses/show?id=HyZVegYAP
59 |
60 |
61 |
67 |
68 | https://m.weibo.cn/api/comments/show?id=4383183661430868&page=1
69 |
70 |
71 |
77 |
78 | https://m.weibo.cn/comments/hotflow?id=4383183661430868&mid=4383183661430868&max_id_type=0
79 |
80 |
81 |
87 |
88 |
89 |
--------------------------------------------------------------------------------
/src/util/Bmob.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*
2 | '''
3 | Created on 2015年7月2日
4 |
5 | @author: RobinTang
6 |
7 | https://github.com/sintrb/Bmob-Py
8 |
9 | '''
10 | import json
11 | import copy
12 | import functools
13 | import requests
14 | from urllib import parse
15 | from .config import *
16 |
17 | def _urljoin(func):
18 | @functools.wraps(func)
19 | def _wrapper(self, resource_path, *args, **kwargs):
20 | url = self.apiurl + '/' + resource_path
21 | return func(self, url, *args, **kwargs)
22 | return _wrapper
23 |
24 |
25 | def urlencode(params):
26 | if isinstance(params, dict):
27 | return parse.urlencode(params)
28 | elif isinstance(params, list):
29 | return parse.quote(''.join(params))
30 | else:
31 | return parse.quote(params)
32 |
33 |
34 | class BmobSDK(object):
35 | '''
36 | BmobSDK, create with Application ID and REST API Key. You can use she same Application with BmobSDK.setup() method.
37 | '''
38 | context = None
39 |
40 | def __init__(self, appid, restkey, apiurl='http://api.bmob.cn/1/classes'):
41 | super(BmobSDK, self).__init__()
42 | self.appid = appid
43 | self.restkey = restkey
44 | self.apiurl = apiurl
45 | self._http_headers = {
46 | "x-Bmob-Application-Id": self.appid,
47 | "X-Bmob-REST-API-Key": self.restkey,
48 | "Content-Type": "application/json"}
49 | # https://stackoverflow.com/questions/24873927/python-requests-module-and-connection-reuse
50 | # 连接重用,防止出现连接数过多抛出异常
51 | # https://2.python-requests.org//en/latest/user/advanced/#session-objects
52 | self.session = requests.Session()
53 | @_urljoin
54 | def get(self, url):
55 | return self.session.get(url, headers=self._http_headers)
56 |
57 | @_urljoin
58 | def post(self, url, **kwargs):
59 | return self.session.post(url, json=kwargs.get('data'), headers=self._http_headers)
60 |
61 | @_urljoin
62 | def put(self, url, **kwargs):
63 | return self.session.put(url, json=kwargs.get('data'), headers=self._http_headers)
64 |
65 | @_urljoin
66 | def delete(self, url, **kwargs):
67 | return self.session.delete(url, headers=self._http_headers)
68 |
69 |
70 | @staticmethod
71 | def setup(appid, restkey):
72 | BmobSDK.context = BmobSDK(appid, restkey)
73 |
74 |
75 | class Query(object):
76 | '''
77 | Bmob Query
78 | '''
79 |
80 | def __init__(self, clz, context=None):
81 | super(Query, self).__init__()
82 | if not context:
83 | context = BmobSDK.context
84 | if not context:
85 | raise BaseException("No BmobSDK context setuped!")
86 | self.context = context
87 | self.clz = clz
88 | self.q = {}
89 | self.w = {} # where
90 | self.items = None
91 |
92 | def copy(self):
93 | q = Query(self.clz, self.context)
94 | q.q = copy.deepcopy(self.q)
95 | q.w = copy.deepcopy(self.w)
96 | return q
97 |
98 | def get_urlencode(self):
99 | if self.w:
100 | self.q['where'] = json.dumps(self.w)
101 | elif 'where' in self.q:
102 | del self.q['where']
103 | return urlencode(self.q)
104 |
105 | def order(self, o):
106 | self.q['order'] = o
107 | return self.copy()
108 |
109 | def limit(self, l):
110 | self.q['limit'] = l
111 | return self.copy()
112 |
113 | def skip(self, s):
114 | self.q['skip'] = s
115 | return self.copy()
116 |
117 | def count(self):
118 | if not self.items is None:
119 | return len(self.items)
120 | else:
121 | self.limit(0)
122 | self.q['count'] = 1
123 | path = '/'.join([self.clz.__name__, '?' + self.get_urlencode()])
124 | return self.context.get(path).json()['count']
125 |
126 | def get_kw(self, k):
127 | if k in self.w:
128 | return self.w[k]
129 | else:
130 | self.w[k] = {}
131 | return self.w[k]
132 |
133 | def w_eq(self, k, v):
134 | '''equal'''
135 | self.w[k] = v
136 | return self.copy()
137 |
138 | def w_lt(self, k, v):
139 | '''less than'''
140 | self.get_kw(k)['$lt'] = v
141 | return self.copy()
142 |
143 | def w_lte(self, k, v):
144 | '''less than or equal'''
145 | self.get_kw(k)['$lte'] = v
146 | return self.copy()
147 |
148 | def w_gt(self, k, v):
149 | '''greater than'''
150 | self.geet_kw(k)['$gt'] = v
151 | return self.copy()
152 |
153 | def w_gte(self, k, v):
154 | '''greater than or equal'''
155 | self.get_kw(k)['$gte'] = v
156 | return self.copy()
157 |
158 | def w_ne(self, k, v):
159 | '''not equal'''
160 | self.get_kw(k)['$ne'] = v
161 | return self.copy()
162 |
163 | def w_in(self, k, v):
164 | '''in'''
165 | self.get_kw(k)['$in'] = v
166 | return self.copy()
167 |
168 | def w_nin(self, k, v):
169 | '''not in'''
170 | self.get_kw(k)['$nin'] = v
171 | return self.copy()
172 |
173 | def w_exists(self, k, v):
174 | self.get_kw(k)['$exists'] = v
175 | return self.copy()
176 |
177 | def w_select(self, k, v):
178 | self.get_kw(k)['$select'] = v
179 | return self.copy()
180 |
181 | def w_dontSelect(self, k, v):
182 | self.get_kw(k)['$dontSelect'] = v
183 | return self.copy()
184 |
185 | def w_all(self, k, v):
186 | self.get_kw(k)['$all'] = v
187 | return self.copy()
188 |
189 | def w_regex(self, k, v):
190 | self.get_kw(k)['$regex'] = v
191 | return self.copy()
192 |
193 | def exec_query(self):
194 | rs = []
195 | path = '/'.join([self.clz.__name__, '?' + self.get_urlencode()])
196 | for r in self.context.get(path).json()['results']:
197 | rs.append(self.clz(**r))
198 | self.items = rs
199 | return self.items
200 |
201 | def first(self):
202 | q = self.copy()
203 | q.limit(1)
204 | rs = q.exec_query()
205 | return len(rs) and rs[0] or None
206 |
207 | def __getslice__(self, s, e):
208 | if self.items is None:
209 | self.exec_query()
210 | return self.items.__getslice__(s, e)
211 |
212 | def __iter__(self):
213 | if self.items is None:
214 | self.exec_query()
215 | return iter(self.items)
216 |
217 | def __getitem__(self, k):
218 | if self.items is None:
219 | self.exec_query()
220 | return self.items.__getitem__(k)
221 |
222 | def __len__(self):
223 | return self.count()
224 |
225 |
226 | class BmobModel(object):
227 | '''
228 | Basic Bmob model, all other Bmob model must inherit this class.
229 | '''
230 |
231 | def __init__(self, context=None, objectId=None, **kwargs):
232 | super(BmobModel, self).__init__()
233 | # check objectId
234 | if isinstance(context, str):
235 | objectId = context
236 | context = None
237 |
238 | if not context:
239 | context = BmobSDK.context
240 | if not context:
241 | raise BaseException("No BmobSDK context setuped!")
242 | self.context = context
243 | self.objectId = objectId
244 | if self.objectId:
245 | # get object by id
246 | path = '/'.join([self.get_modelname(), self.objectId])
247 | for k, v in self.context.get(path).json().items():
248 | setattr(self, k, v)
249 | else:
250 | for k, v in kwargs.items():
251 | setattr(self, k, v)
252 |
253 | def get_attrs(self):
254 | return [k for k in type(self).__dict__ if not k.startswith('__')]
255 |
256 | def get_dict(self):
257 | ks = self.get_attrs()
258 | clz = type(self)
259 | dic = {}
260 | tps = [type(v) for v in [1, 1, 1.0, '1', (1, 2), [1, 2], {'1': '1'}, {1, 2}]]
261 | return dict([(k, type(getattr(clz, k))(getattr(self, k)))
262 | for k in ks if type(getattr(clz, k)) in tps])
263 |
264 | def get_modelname(self):
265 | return type(self).__name__
266 |
267 | def save(self):
268 | data = self.get_dict()
269 | #jdata = json.dumps(data)
270 | if self.objectId:
271 | path = '/'.join([self.get_modelname(), self.objectId])
272 | for k, v in self.context.put(path, data=data).json().items():
273 | setattr(self, k, v)
274 | else:
275 | path = self.get_modelname()
276 | for k, v in self.context.post(path, data=data).json().items():
277 | setattr(self, k, v)
278 |
279 | def delete(self):
280 | if self.objectId:
281 | path = '/'.join([self.get_modelname(), self.objectId])
282 | res = self.context.delete(path).json()['msg'] == 'ok'
283 | if res:
284 | self.objectId = None
285 | return res
286 | else:
287 | return True
288 |
289 | def query(self):
290 | return Query(type(self))
291 |
292 |
293 |
--------------------------------------------------------------------------------
/src/main.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*
2 |
3 | import os, logging, time, signal, sys, requests, traceback, json
4 | from util.mysql_helper import *
5 | from collections import deque
6 | from lxml import etree
7 |
8 | # 用来获取 containerid
9 | INFO_URL = 'https://m.weibo.cn/api/container/getIndex?type=uid&value={}'
10 | # 注意不同人的微博 containerid 是不同的
11 | WEIBO_URL = 'https://m.weibo.cn/api/container/getIndex?containerid={}&page={}'
12 | LONG_WEIBO_URL = 'https://m.weibo.cn/statuses/extend?id={}'
13 | COMMENT_URL = 'https://m.weibo.cn/api/comments/show?id={}&page={}'
14 | FOLLOWING_URL = 'https://m.weibo.cn/api/container/getIndex?containerid=231051_-_followers_-_{}&page={}'
15 |
16 |
17 | class WBSpider():
18 |
19 | def init_logging(self, name='crawling', log_level=logging.INFO):
20 | file_dir = os.path.dirname(os.path.realpath('__file__')) + "/log"
21 | # 没有目录的时候自动创建
22 | if not os.path.isdir(file_dir):
23 | os.makedirs(file_dir)
24 | fileh = logging.FileHandler(file_dir+f'/{name}-{logging.getLevelName(log_level)}.log', 'w', encoding='utf-8')
25 | formatter = logging.Formatter("%(asctime)s;%(levelname)s;%(message)s",
26 | "%Y-%m-%d %H:%M:%S")
27 | fileh.setFormatter(formatter)
28 |
29 | log = logging.getLogger() # root logger
30 | for hdlr in log.handlers[:]: # remove all old handlers
31 | log.removeHandler(hdlr)
32 | log.addHandler(fileh) # set the new handler
33 | log.setLevel(log_level)
34 |
35 | return fileh
36 |
37 | def fetch_table(self, table='Crawling'):
38 | self.MYCURSOR.execute(f'SELECT * FROM {table}')
39 | columns = [col[0] for col in self.MYCURSOR.description]
40 | return [dict(zip(columns, row)) for row in self.MYCURSOR.fetchall()]
41 |
42 | def sel_from_table(self, table, key, value):
43 | self.MYCURSOR.execute(f"SELECT * FROM {table} WHERE {key} = '{value}'")
44 | columns = [col[0] for col in self.MYCURSOR.description]
45 | return [dict(zip(columns, row)) for row in self.MYCURSOR.fetchall()]
46 |
47 | def del_from_table(self, table, key, value):
48 | self.MYCURSOR.execute(f"DELETE FROM {table} WHERE {key} = '{value}'")
49 | self.MYDB.commit()
50 | def ins_to_table(self, table, data_dict):
51 | try:
52 | columns = ', '.join(data_dict.keys())
53 | placeholders = ', '.join(['%s'] * len(data_dict))
54 | sql = "INSERT INTO %s ( %s ) VALUES ( %s )" % (table, columns, placeholders)
55 | for key in data_dict.keys():
56 | if isinstance(data_dict[key], list):
57 | data_dict[key] = json.dumps(data_dict[key])
58 | self.MYCURSOR.execute(sql, list(data_dict.values()))
59 | self.MYDB.commit()
60 | except mysql.connector.errors.IntegrityError as e:
61 | # 遇到重复插入直接跳过
62 | pass
63 |
64 | def init_crawl(self):
65 | # 待爬取队列,采用广度优先搜索
66 | self.crawling = deque(self.fetch_table())
67 | self.crawled = deque(self.fetch_table('Crawled'))
68 | def save_crawl_to_bmob(self):
69 | for crawling_item in self.crawling:
70 | crawling_item.save()
71 | for crawled_item in crawled:
72 | crawled_item.save()
73 |
74 | def init_session(self):
75 | self.session = requests.Session()
76 | cookies_dict = dict([l.split("=", 1) for l in config['weibo']['COOKIE'].split("; ")])
77 | # https://blog.csdn.net/mgxcool/article/details/52663382
78 | requests.utils.add_dict_to_cookiejar(self.session.cookies, cookies_dict)
79 |
80 | def init_mysql(self):
81 | create_db_if_not_exists()
82 | (self.MYDB, self.MYCURSOR) = create_table_if_not_exists()
83 |
84 | def __init__(self):
85 | self.init_logging()
86 | logging.info('正在初始化数据库...')
87 | self.init_mysql()
88 | logging.info('正在初始化爬取队列...')
89 | self.init_crawl()
90 | self.init_session()
91 |
92 | def get_data(self, url):
93 | # 每次请求之前等待数秒,防止因为速度过快被封
94 | time.sleep(config['crawl']['PERIOD'])
95 | res = self.session.get(url).json()
96 | if 'msg' in res.keys() and res['msg'] == '请求过于频繁,歇歇吧':
97 | logging.warning(f"当前请求过于频繁,等待 {config['crawl']['FORBID_PAUSE']} 秒")
98 | time.sleep(config['crawl']['FORBID_PAUSE'])
99 | logging.warnng(f'等待完毕,重新请求')
100 | return get_data(url)
101 | return res
102 |
103 | def crawl_user_following(self, uid):
104 | """
105 | 返回此用户的所有 following 的 (uid, uname)(字典)
106 | """
107 | try:
108 | result = []
109 | cur_page = 1
110 | while True:
111 | logging.info(f'正在爬取 {uid} 的第 {cur_page} 页的 following')
112 | # https://m.weibo.cn/api/container/getIndex?containerid=231051_-_followers_-_1669879400&page=0
113 | url = FOLLOWING_URL.format(uid, cur_page)
114 | data = self.get_data(url)
115 | if len(data['data']['cards']) == 0:
116 | logging.info(f'用户 {uid} 的 following 爬取完毕')
117 | return result
118 |
119 | for card in data['data']['cards']:
120 | for card_group_item in card['card_group']:
121 | # 只有类型为 10 才是真正的关注列表
122 | if card_group_item['card_type'] != 10:
123 | continue
124 | result.append({'uid': card_group_item['user']['id'], 'uname': card_group_item['user']['screen_name']})
125 | cur_page += 1
126 | logging.info(f'将新增加 {len(result)} 个 following 到队列中')
127 | return result
128 | except:
129 | logging.error('following 抓取出错')
130 | logging.error(traceback.format_exc())
131 | return []
132 |
133 | def get_weibo_containerid(self, uid):
134 | try:
135 | # https://m.weibo.cn/api/container/getIndex?type=uid&value=1669879400
136 | url = INFO_URL.format(uid)
137 | data = self.get_data(url)
138 | return data['data']['tabsInfo']['tabs'][1]['containerid']
139 | except:
140 | logging.error('containerid 抓取出错')
141 | logging.error(traceback.format_exc())
142 | logging.error(data)
143 | def crawl_user_weibo(self, uid):
144 | """
145 | 将所有的微博爬取到,并存储到 Weibo 表中
146 | """
147 | try:
148 | containerid = self.get_weibo_containerid(uid)
149 | cur_page = 1
150 | while True:
151 | logging.info(f'正在爬取 {uid} 的第 {cur_page} 页微博')
152 | # https://m.weibo.cn/api/container/getIndex?containerid=1076031669879400&page=0
153 | url = WEIBO_URL.format(containerid, cur_page)
154 | data = self.get_data(url)
155 | if len(data['data']['cards']) == 0:
156 | logging.info(f'用户 {uid} 爬取完毕')
157 | return
158 |
159 | for card in data['data']['cards']:
160 | # 忽略广告等其他卡片
161 | if card["card_type"] != 9:
162 | continue
163 | mblog = card["mblog"]
164 | # 如果是转发微博的话,忽略
165 | if "retweeted_status" in mblog:
166 | continue
167 |
168 | selector = etree.HTML(mblog["text"])
169 | a_text = selector.xpath("//a/text()")
170 | # 将 HTML 转换为 txt
171 | # 参考 https://www.zybuluo.com/Alston/note/778377
172 | text = etree.tostring(selector, method="text", encoding="UTF-8").decode('utf-8')
173 | img_emoji = selector.xpath("//span/img/@alt")
174 |
175 | weibo = {'uid': uid, 'text': text, 'mid': mblog['mid'], 'img_emoji': img_emoji}
176 | self.ins_to_table('Weibo', weibo)
177 |
178 | # 抓取评论
179 | self.crawl_weibo_comments(mblog['mid'])
180 |
181 | cur_page += 1
182 | except:
183 | logging.error('微博抓取出错')
184 | logging.error(traceback.format_exc())
185 | logging.error(data)
186 |
187 | def crawl_weibo_comments(self, mid, max=10):
188 | """
189 | 将某一篇微博的评论爬取 10 页,并存储到 Comment 表中,将 mid(博文唯一标识)设置为传入的 mid
190 | """
191 | try:
192 | cur_page = 1
193 | for i in range(max):
194 | logging.info(f'正在抓取 {mid} 的第 {cur_page} 页评论')
195 | # https://m.weibo.cn/api/comments/show?id=4384122253963002&page=0
196 | url = COMMENT_URL.format(mid, cur_page)
197 | data = self.get_data(url)
198 | if data['msg'] == '暂无数据':
199 | break
200 | for comment in data['data']['data']:
201 | selector = etree.HTML(comment["text"])
202 | cid = comment["id"]
203 | text = etree.tostring(selector, method="text", encoding="UTF-8").decode('utf-8')
204 | img_emoji = selector.xpath("//span/img/@alt")
205 |
206 | comment = {'cid': cid, 'mid': mid, 'text': text, 'img_emoji': img_emoji}
207 | self.ins_to_table('Comment', comment)
208 |
209 | cur_page += 1
210 | logging.info(f'微博 {mid} 爬取完毕')
211 | except:
212 | logging.error('评论抓取出错')
213 | logging.error(traceback.format_exc())
214 | logging.error(data)
215 |
216 | def crawl(self, uid):
217 | """
218 | 爬取 uid 所代表的用户
219 | 结束之后返回此用户的所有 following 的 (uid, uname)(字典)
220 | """
221 | self.crawl_user_weibo(uid)
222 | return self.crawl_user_following(uid)
223 |
224 | def startBFS(self):
225 | """
226 | 开始爬取(广度优先搜索)
227 | """
228 | # 理论上会结束,实际上并不会结束
229 | while len(self.crawling) > 0:
230 | crawling_user = self.crawling.popleft()
231 | adj_arr = self.crawl(crawling_user['uid'])
232 | if adj_arr == None:
233 | logging.error('不正常终止')
234 | exit(-1)
235 | self.del_from_table('Crawling', 'uid', crawling_user['uid'])
236 | logging.info(f"{crawling_user['uid']}-{crawling_user['uname']} 已从 Crawling 队列和数据库中移除")
237 | self.ins_to_table('Crawled', crawling_user)
238 | logging.info(f'{crawling_user["uid"]}-{crawling_user["uname"]} 已加入到 Crawled 队列和数据库中')
239 | # 是 Following,并且没有被抓取过
240 | for v in adj_arr:
241 | if len(self.sel_from_table('Crawled', 'uid', v['uid'])) == 0:
242 | crawling_user_new = {'uid': v['uid'], 'uname': v['uname']}
243 | self.ins_to_table('Crawling', crawling_user_new)
244 | self.crawling.append(crawling_user_new)
245 | logging.info(f"{v['uid']}-{v['uname']} 已加入到 Crawling 队列和数据库中")
246 |
247 | def signal_handler(sig, frame):
248 | print('You pressed Ctrl+C!')
249 | sys.exit(0)
250 | if __name__ == "__main__":
251 | signal.signal(signal.SIGINT, signal_handler)
252 |
253 | spider = WBSpider()
254 | spider.startBFS()
255 |
--------------------------------------------------------------------------------
/src/files/unicodeDesc.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | People
4 |
5 | :bowtie:
6 | :smile:
7 | :laughing:
8 | :blush:
9 | :smiley:
10 | :relaxed:
11 | :smirk:
12 | :heart_eyes:
13 | :kissing_heart:
14 | :kissing_closed_eyes:
15 | :flushed:
16 | :relieved:
17 | :satisfied:
18 | :grin:
19 | :wink:
20 | :stuck_out_tongue_winking_eye:
21 | :stuck_out_tongue_closed_eyes:
22 | :grinning:
23 | :kissing:
24 | :kissing_smiling_eyes:
25 | :stuck_out_tongue:
26 | :sleeping:
27 | :worried:
28 | :frowning:
29 | :anguished:
30 | :open_mouth:
31 | :grimacing:
32 | :confused:
33 | :hushed:
34 | :expressionless:
35 | :unamused:
36 | :sweat_smile:
37 | :sweat:
38 | :disappointed_relieved:
39 | :weary:
40 | :pensive:
41 | :disappointed:
42 | :confounded:
43 | :fearful:
44 | :cold_sweat:
45 | :persevere:
46 | :cry:
47 | :sob:
48 | :joy:
49 | :astonished:
50 | :scream:
51 | :neckbeard:
52 | :tired_face:
53 | :angry:
54 | :rage:
55 | :triumph:
56 | :sleepy:
57 | :yum:
58 | :mask:
59 | :sunglasses:
60 | :dizzy_face:
61 | :imp:
62 | :smiling_imp:
63 | :neutral_face:
64 | :no_mouth:
65 | :innocent:
66 | :alien:
67 | :yellow_heart:
68 | :blue_heart:
69 | :purple_heart:
70 | :heart:
71 | :green_heart:
72 | :broken_heart:
73 | :heartbeat:
74 | :heartpulse:
75 | :two_hearts:
76 | :revolving_hearts:
77 | :cupid:
78 | :sparkling_heart:
79 | :sparkles:
80 | :star:
81 | :star2:
82 | :dizzy:
83 | :boom:
84 | :collision:
85 | :anger:
86 | :exclamation:
87 | :question:
88 | :grey_exclamation:
89 | :grey_question:
90 | :zzz:
91 | :dash:
92 | :sweat_drops:
93 | :notes:
94 | :musical_note:
95 | :fire:
96 | :hankey:
97 | :poop:
98 | :shit:
99 | :+1:
100 | :thumbsup:
101 | :-1:
102 | :thumbsdown:
103 | :ok_hand:
104 | :punch:
105 | :facepunch:
106 | :fist:
107 | :v:
108 | :wave:
109 | :hand:
110 | :raised_hand:
111 | :open_hands:
112 | :point_up:
113 | :point_down:
114 | :point_left:
115 | :point_right:
116 | :raised_hands:
117 | :pray:
118 | :point_up_2:
119 | :clap:
120 | :muscle:
121 | :metal:
122 | :fu:
123 | :walking:
124 | :runner:
125 | :running:
126 | :couple:
127 | :family:
128 | :two_men_holding_hands:
129 | :two_women_holding_hands:
130 | :dancer:
131 | :dancers:
132 | :ok_woman:
133 | :no_good:
134 | :information_desk_person:
135 | :raising_hand:
136 | :bride_with_veil:
137 | :person_with_pouting_face:
138 | :person_frowning:
139 | :bow:
140 | :couple_with_heart:
141 | :massage:
142 | :haircut:
143 | :nail_care:
144 | :boy:
145 | :girl:
146 | :woman:
147 | :man:
148 | :baby:
149 | :older_woman:
150 | :older_man:
151 | :person_with_blond_hair:
152 | :man_with_gua_pi_mao:
153 | :man_with_turban:
154 | :construction_worker:
155 | :cop:
156 | :angel:
157 | :princess:
158 | :smiley_cat:
159 | :smile_cat:
160 | :heart_eyes_cat:
161 | :kissing_cat:
162 | :smirk_cat:
163 | :scream_cat:
164 | :crying_cat_face:
165 | :joy_cat:
166 | :pouting_cat:
167 | :japanese_ogre:
168 | :japanese_goblin:
169 | :see_no_evil:
170 | :hear_no_evil:
171 | :speak_no_evil:
172 | :guardsman:
173 | :skull:
174 | :feet:
175 | :lips:
176 | :kiss:
177 | :droplet:
178 | :ear:
179 | :eyes:
180 | :nose:
181 | :tongue:
182 | :love_letter:
183 | :bust_in_silhouette:
184 | :busts_in_silhouette:
185 | :speech_balloon:
186 | :thought_balloon:
187 | :feelsgood:
188 | :finnadie:
189 | :goberserk:
190 | :godmode:
191 | :hurtrealbad:
192 | :rage1:
193 | :rage2:
194 | :rage3:
195 | :rage4:
196 | :suspect:
197 | :trollface:
198 |
199 | Nature
200 |
201 | :sunny:
202 | :umbrella:
203 | :cloud:
204 | :snowflake:
205 | :snowman:
206 | :zap:
207 | :cyclone:
208 | :foggy:
209 | :ocean:
210 | :cat:
211 | :dog:
212 | :mouse:
213 | :hamster:
214 | :rabbit:
215 | :wolf:
216 | :frog:
217 | :tiger:
218 | :koala:
219 | :bear:
220 | :pig:
221 | :pig_nose:
222 | :cow:
223 | :boar:
224 | :monkey_face:
225 | :monkey:
226 | :horse:
227 | :racehorse:
228 | :camel:
229 | :sheep:
230 | :elephant:
231 | :panda_face:
232 | :snake:
233 | :bird:
234 | :baby_chick:
235 | :hatched_chick:
236 | :hatching_chick:
237 | :chicken:
238 | :penguin:
239 | :turtle:
240 | :bug:
241 | :honeybee:
242 | :ant:
243 | :beetle:
244 | :snail:
245 | :octopus:
246 | :tropical_fish:
247 | :fish:
248 | :whale:
249 | :whale2:
250 | :dolphin:
251 | :cow2:
252 | :ram:
253 | :rat:
254 | :water_buffalo:
255 | :tiger2:
256 | :rabbit2:
257 | :dragon:
258 | :goat:
259 | :rooster:
260 | :dog2:
261 | :pig2:
262 | :mouse2:
263 | :ox:
264 | :dragon_face:
265 | :blowfish:
266 | :crocodile:
267 | :dromedary_camel:
268 | :leopard:
269 | :cat2:
270 | :poodle:
271 | :paw_prints:
272 | :bouquet:
273 | :cherry_blossom:
274 | :tulip:
275 | :four_leaf_clover:
276 | :rose:
277 | :sunflower:
278 | :hibiscus:
279 | :maple_leaf:
280 | :leaves:
281 | :fallen_leaf:
282 | :herb:
283 | :mushroom:
284 | :cactus:
285 | :palm_tree:
286 | :evergreen_tree:
287 | :deciduous_tree:
288 | :chestnut:
289 | :seedling:
290 | :blossom:
291 | :ear_of_rice:
292 | :shell:
293 | :globe_with_meridians:
294 | :sun_with_face:
295 | :full_moon_with_face:
296 | :new_moon_with_face:
297 | :new_moon:
298 | :waxing_crescent_moon:
299 | :first_quarter_moon:
300 | :waxing_gibbous_moon:
301 | :full_moon:
302 | :waning_gibbous_moon:
303 | :last_quarter_moon:
304 | :waning_crescent_moon:
305 | :last_quarter_moon_with_face:
306 | :first_quarter_moon_with_face:
307 | :moon:
308 | :earth_africa:
309 | :earth_americas:
310 | :earth_asia:
311 | :volcano:
312 | :milky_way:
313 | :partly_sunny:
314 | :octocat:
315 | :squirrel:
316 |
317 | Objects
318 |
319 | :bamboo:
320 | :gift_heart:
321 | :dolls:
322 | :school_satchel:
323 | :mortar_board:
324 | :flags:
325 | :fireworks:
326 | :sparkler:
327 | :wind_chime:
328 | :rice_scene:
329 | :jack_o_lantern:
330 | :ghost:
331 | :santa:
332 | :christmas_tree:
333 | :gift:
334 | :bell:
335 | :no_bell:
336 | :tanabata_tree:
337 | :tada:
338 | :confetti_ball:
339 | :balloon:
340 | :crystal_ball:
341 | :cd:
342 | :dvd:
343 | :floppy_disk:
344 | :camera:
345 | :video_camera:
346 | :movie_camera:
347 | :computer:
348 | :tv:
349 | :iphone:
350 | :phone:
351 | :telephone:
352 | :telephone_receiver:
353 | :pager:
354 | :fax:
355 | :minidisc:
356 | :vhs:
357 | :sound:
358 | :speaker:
359 | :mute:
360 | :loudspeaker:
361 | :mega:
362 | :hourglass:
363 | :hourglass_flowing_sand:
364 | :alarm_clock:
365 | :watch:
366 | :radio:
367 | :satellite:
368 | :loop:
369 | :mag:
370 | :mag_right:
371 | :unlock:
372 | :lock:
373 | :lock_with_ink_pen:
374 | :closed_lock_with_key:
375 | :key:
376 | :bulb:
377 | :flashlight:
378 | :high_brightness:
379 | :low_brightness:
380 | :electric_plug:
381 | :battery:
382 | :calling:
383 | :email:
384 | :mailbox:
385 | :postbox:
386 | :bath:
387 | :bathtub:
388 | :shower:
389 | :toilet:
390 | :wrench:
391 | :nut_and_bolt:
392 | :hammer:
393 | :seat:
394 | :moneybag:
395 | :yen:
396 | :dollar:
397 | :pound:
398 | :euro:
399 | :credit_card:
400 | :money_with_wings:
401 | :e-mail:
402 | :inbox_tray:
403 | :outbox_tray:
404 | :envelope:
405 | :incoming_envelope:
406 | :postal_horn:
407 | :mailbox_closed:
408 | :mailbox_with_mail:
409 | :mailbox_with_no_mail:
410 | :package:
411 | :door:
412 | :smoking:
413 | :bomb:
414 | :gun:
415 | :hocho:
416 | :pill:
417 | :syringe:
418 | :page_facing_up:
419 | :page_with_curl:
420 | :bookmark_tabs:
421 | :bar_chart:
422 | :chart_with_upwards_trend:
423 | :chart_with_downwards_trend:
424 | :scroll:
425 | :clipboard:
426 | :calendar:
427 | :date:
428 | :card_index:
429 | :file_folder:
430 | :open_file_folder:
431 | :scissors:
432 | :pushpin:
433 | :paperclip:
434 | :black_nib:
435 | :pencil2:
436 | :straight_ruler:
437 | :triangular_ruler:
438 | :closed_book:
439 | :green_book:
440 | :blue_book:
441 | :orange_book:
442 | :notebook:
443 | :notebook_with_decorative_cover:
444 | :ledger:
445 | :books:
446 | :bookmark:
447 | :name_badge:
448 | :microscope:
449 | :telescope:
450 | :newspaper:
451 | :football:
452 | :basketball:
453 | :soccer:
454 | :baseball:
455 | :tennis:
456 | :8ball:
457 | :rugby_football:
458 | :bowling:
459 | :golf:
460 | :mountain_bicyclist:
461 | :bicyclist:
462 | :horse_racing:
463 | :snowboarder:
464 | :swimmer:
465 | :surfer:
466 | :ski:
467 | :spades:
468 | :hearts:
469 | :clubs:
470 | :diamonds:
471 | :gem:
472 | :ring:
473 | :trophy:
474 | :musical_score:
475 | :musical_keyboard:
476 | :violin:
477 | :space_invader:
478 | :video_game:
479 | :black_joker:
480 | :flower_playing_cards:
481 | :game_die:
482 | :dart:
483 | :mahjong:
484 | :clapper:
485 | :memo:
486 | :pencil:
487 | :book:
488 | :art:
489 | :microphone:
490 | :headphones:
491 | :trumpet:
492 | :saxophone:
493 | :guitar:
494 | :shoe:
495 | :sandal:
496 | :high_heel:
497 | :lipstick:
498 | :boot:
499 | :shirt:
500 | :tshirt:
501 | :necktie:
502 | :womans_clothes:
503 | :dress:
504 | :running_shirt_with_sash:
505 | :jeans:
506 | :kimono:
507 | :bikini:
508 | :ribbon:
509 | :tophat:
510 | :crown:
511 | :womans_hat:
512 | :mans_shoe:
513 | :closed_umbrella:
514 | :briefcase:
515 | :handbag:
516 | :pouch:
517 | :purse:
518 | :eyeglasses:
519 | :fishing_pole_and_fish:
520 | :coffee:
521 | :tea:
522 | :sake:
523 | :baby_bottle:
524 | :beer:
525 | :beers:
526 | :cocktail:
527 | :tropical_drink:
528 | :wine_glass:
529 | :fork_and_knife:
530 | :pizza:
531 | :hamburger:
532 | :fries:
533 | :poultry_leg:
534 | :meat_on_bone:
535 | :spaghetti:
536 | :curry:
537 | :fried_shrimp:
538 | :bento:
539 | :sushi:
540 | :fish_cake:
541 | :rice_ball:
542 | :rice_cracker:
543 | :rice:
544 | :ramen:
545 | :stew:
546 | :oden:
547 | :dango:
548 | :egg:
549 | :bread:
550 | :doughnut:
551 | :custard:
552 | :icecream:
553 | :ice_cream:
554 | :shaved_ice:
555 | :birthday:
556 | :cake:
557 | :cookie:
558 | :chocolate_bar:
559 | :candy:
560 | :lollipop:
561 | :honey_pot:
562 | :apple:
563 | :green_apple:
564 | :tangerine:
565 | :lemon:
566 | :cherries:
567 | :grapes:
568 | :watermelon:
569 | :strawberry:
570 | :peach:
571 | :melon:
572 | :banana:
573 | :pear:
574 | :pineapple:
575 | :sweet_potato:
576 | :eggplant:
577 | :tomato:
578 | :corn:
579 |
580 | Places
581 |
582 | :house:
583 | :house_with_garden:
584 | :school:
585 | :office:
586 | :post_office:
587 | :hospital:
588 | :bank:
589 | :convenience_store:
590 | :love_hotel:
591 | :hotel:
592 | :wedding:
593 | :church:
594 | :department_store:
595 | :european_post_office:
596 | :city_sunrise:
597 | :city_sunset:
598 | :japanese_castle:
599 | :european_castle:
600 | :tent:
601 | :factory:
602 | :tokyo_tower:
603 | :japan:
604 | :mount_fuji:
605 | :sunrise_over_mountains:
606 | :sunrise:
607 | :stars:
608 | :statue_of_liberty:
609 | :bridge_at_night:
610 | :carousel_horse:
611 | :rainbow:
612 | :ferris_wheel:
613 | :fountain:
614 | :roller_coaster:
615 | :ship:
616 | :speedboat:
617 | :boat:
618 | :sailboat:
619 | :rowboat:
620 | :anchor:
621 | :rocket:
622 | :airplane:
623 | :helicopter:
624 | :steam_locomotive:
625 | :tram:
626 | :mountain_railway:
627 | :bike:
628 | :aerial_tramway:
629 | :suspension_railway:
630 | :mountain_cableway:
631 | :tractor:
632 | :blue_car:
633 | :oncoming_automobile:
634 | :car:
635 | :red_car:
636 | :taxi:
637 | :oncoming_taxi:
638 | :articulated_lorry:
639 | :bus:
640 | :oncoming_bus:
641 | :rotating_light:
642 | :police_car:
643 | :oncoming_police_car:
644 | :fire_engine:
645 | :ambulance:
646 | :minibus:
647 | :truck:
648 | :train:
649 | :station:
650 | :train2:
651 | :bullettrain_front:
652 | :bullettrain_side:
653 | :light_rail:
654 | :monorail:
655 | :railway_car:
656 | :trolleybus:
657 | :ticket:
658 | :fuelpump:
659 | :vertical_traffic_light:
660 | :traffic_light:
661 | :warning:
662 | :construction:
663 | :beginner:
664 | :atm:
665 | :slot_machine:
666 | :busstop:
667 | :barber:
668 | :hotsprings:
669 | :checkered_flag:
670 | :crossed_flags:
671 | :izakaya_lantern:
672 | :moyai:
673 | :circus_tent:
674 | :performing_arts:
675 | :round_pushpin:
676 | :triangular_flag_on_post:
677 | :jp:
678 | :kr:
679 | :cn:
680 | :us:
681 | :fr:
682 | :es:
683 | :it:
684 | :ru:
685 | :gb:
686 | :uk:
687 | :de:
688 |
689 | Symbols
690 |
691 | :one:
692 | :two:
693 | :three:
694 | :four:
695 | :five:
696 | :six:
697 | :seven:
698 | :eight:
699 | :nine:
700 | :keycap_ten:
701 | :1234:
702 | :zero:
703 | :hash:
704 | :symbols:
705 | :arrow_backward:
706 | :arrow_down:
707 | :arrow_forward:
708 | :arrow_left:
709 | :capital_abcd:
710 | :abcd:
711 | :abc:
712 | :arrow_lower_left:
713 | :arrow_lower_right:
714 | :arrow_right:
715 | :arrow_up:
716 | :arrow_upper_left:
717 | :arrow_upper_right:
718 | :arrow_double_down:
719 | :arrow_double_up:
720 | :arrow_down_small:
721 | :arrow_heading_down:
722 | :arrow_heading_up:
723 | :leftwards_arrow_with_hook:
724 | :arrow_right_hook:
725 | :left_right_arrow:
726 | :arrow_up_down:
727 | :arrow_up_small:
728 | :arrows_clockwise:
729 | :arrows_counterclockwise:
730 | :rewind:
731 | :fast_forward:
732 | :information_source:
733 | :ok:
734 | :twisted_rightwards_arrows:
735 | :repeat:
736 | :repeat_one:
737 | :new:
738 | :top:
739 | :up:
740 | :cool:
741 | :free:
742 | :ng:
743 | :cinema:
744 | :koko:
745 | :signal_strength:
746 | :u5272:
747 | :u5408:
748 | :u55b6:
749 | :u6307:
750 | :u6708:
751 | :u6709:
752 | :u6e80:
753 | :u7121:
754 | :u7533:
755 | :u7a7a:
756 | :u7981:
757 | :sa:
758 | :restroom:
759 | :mens:
760 | :womens:
761 | :baby_symbol:
762 | :no_smoking:
763 | :parking:
764 | :wheelchair:
765 | :metro:
766 | :baggage_claim:
767 | :accept:
768 | :wc:
769 | :potable_water:
770 | :put_litter_in_its_place:
771 | :secret:
772 | :congratulations:
773 | :m:
774 | :passport_control:
775 | :left_luggage:
776 | :customs:
777 | :ideograph_advantage:
778 | :cl:
779 | :sos:
780 | :id:
781 | :no_entry_sign:
782 | :underage:
783 | :no_mobile_phones:
784 | :do_not_litter:
785 | :non-potable_water:
786 | :no_bicycles:
787 | :no_pedestrians:
788 | :children_crossing:
789 | :no_entry:
790 | :eight_spoked_asterisk:
791 | :sparkle:
792 | :eight_pointed_black_star:
793 | :heart_decoration:
794 | :vs:
795 | :vibration_mode:
796 | :mobile_phone_off:
797 | :chart:
798 | :currency_exchange:
799 | :aries:
800 | :taurus:
801 | :gemini:
802 | :cancer:
803 | :leo:
804 | :virgo:
805 | :libra:
806 | :scorpius:
807 | :sagittarius:
808 | :capricorn:
809 | :aquarius:
810 | :pisces:
811 | :ophiuchus:
812 | :six_pointed_star:
813 | :negative_squared_cross_mark:
814 | :a:
815 | :b:
816 | :ab:
817 | :o2:
818 | :diamond_shape_with_a_dot_inside:
819 | :recycle:
820 | :end:
821 | :back:
822 | :on:
823 | :soon:
824 | :clock1:
825 | :clock130:
826 | :clock10:
827 | :clock1030:
828 | :clock11:
829 | :clock1130:
830 | :clock12:
831 | :clock1230:
832 | :clock2:
833 | :clock230:
834 | :clock3:
835 | :clock330:
836 | :clock4:
837 | :clock430:
838 | :clock5:
839 | :clock530:
840 | :clock6:
841 | :clock630:
842 | :clock7:
843 | :clock730:
844 | :clock8:
845 | :clock830:
846 | :clock9:
847 | :clock930:
848 | :heavy_dollar_sign:
849 | :copyright:
850 | :registered:
851 | :tm:
852 | :x:
853 | :heavy_exclamation_mark:
854 | :bangbang:
855 | :interrobang:
856 | :o:
857 | :heavy_multiplication_x:
858 | :heavy_plus_sign:
859 | :heavy_minus_sign:
860 | :heavy_division_sign:
861 | :white_flower:
862 | :100:
863 | :heavy_check_mark:
864 | :ballot_box_with_check:
865 | :radio_button:
866 | :link:
867 | :curly_loop:
868 | :wavy_dash:
869 | :part_alternation_mark:
870 | :trident:
871 | :black_small_square:
872 | :white_small_square:
873 | :black_medium_small_square:
874 | :white_medium_small_square:
875 | :black_medium_square:
876 | :white_medium_square:
877 | :black_large_square:
878 | :white_large_square:
879 | :white_check_mark:
880 | :black_square_button:
881 | :white_square_button:
882 | :black_circle:
883 | :white_circle:
884 | :red_circle:
885 | :large_blue_circle:
886 | :large_blue_diamond:
887 | :large_orange_diamond:
888 | :small_blue_diamond:
889 | :small_orange_diamond:
890 | :small_red_triangle:
891 | :small_red_triangle_down:
892 | :shipit:
893 |
--------------------------------------------------------------------------------
/old/spider.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | import settings, requests, sys, traceback, json, os, time, random, csv, tqdm, threading
3 | import logging, datetime
4 | from lxml import etree
5 | import progress
6 | from selenium import webdriver, common
7 |
8 | INFO_URL = 'https://m.weibo.cn/api/container/getIndex?type=uid&value={}'
9 | WEIBO_URL = 'https://m.weibo.cn/api/container/getIndex?type=uid&value={}&containerid={}&page={}'
10 | LONG_WEIBO_URL = 'https://m.weibo.cn/statuses/extend?id={}'
11 | COMMENT_URL = 'https://m.weibo.cn/api/comments/show?id={}&page={}'
12 | COMMENT_URL1 = 'https://m.weibo.cn/comments/hotflow?id={}&mid={}&max_id_type={}'
13 | COMMENT_URL2 = 'https://m.weibo.cn/comments/hotflow?id={}&mid={}&max_id_type={}&max_id={}'
14 |
15 | # 现在爬取第几次
16 | num_requested = 0
17 | # 正在爬取的用户
18 | crawling_user_ids = []
19 | # 等待爬取的用户
20 | waiting_user_ids = []
21 | # 已经爬取完毕的用户
22 | crawled_user_ids = progress.crawled
23 |
24 | current_user_index = 0
25 |
26 |
27 | # http://nladuo.github.io/2018/12/08/那些年,我爬过的北科-六-——反反爬虫之js渲染
28 | # 微博评论貌似有反爬机制,因此使用『无头浏览器』,模仿 Chrome 浏览器
29 | # 比如这个请求在 Chrome 就 okay,但是用普通的 request 就会返回登陆的 html:
30 | # https://m.weibo.cn/comments/hotflow?id=1669879400&mid=4384122253963002&max_id_type=0&max_id=261294286701954
31 | # 最后发现其实是需要 Cookie,我在 Chrome Driver 里面登录来获取 Cookie(第二页以及以后的均需要 Cookie)
32 | # selenium 的使用参考文档 https://selenium-python.readthedocs.io
33 | driver = webdriver.Chrome(settings.CHROME_DRIVER_PATH)
34 | logged = False
35 |
36 | def login():
37 | driver.get('https://passport.weibo.cn/signin/login')
38 | print('请进入 https://passport.weibo.cn/signin/login 后按 Enter 键继续')
39 | os.system("pause")
40 | driver.find_element_by_id('loginName').send_keys(settings.USER_PASSWORD[current_user_index]['user'])
41 | driver.find_element_by_id('loginPassword').send_keys(settings.USER_PASSWORD[current_user_index]['password'])
42 | # https://stackoverflow.com/questions/21350605/python-selenium-click-on-button
43 | # driver.find_element_by_id('loginAction').click()
44 | print(f'密码已经填好,请完成登录之后按 Enter 继续(可能需要人工拖动滑块验证)')
45 | os.system("pause")
46 | logged = True
47 |
48 | # See https://osf.io/upav8/
49 | # Line by line https://github.com/rkern/line_profiler
50 | import cProfile, pstats, io
51 | def profile(fnc):
52 |
53 | """A decorator that uses cProfile to profile a function"""
54 |
55 | def inner(*args, **kwargs):
56 |
57 | pr = cProfile.Profile()
58 | pr.enable()
59 | retval = fnc(*args, **kwargs)
60 | pr.disable()
61 | s = io.StringIO()
62 | sortby = 'cumulative'
63 | ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
64 | ps.print_stats()
65 | logging.info(s.getvalue())
66 | return retval
67 |
68 | return inner
69 |
70 | class WBSpider():
71 | @staticmethod
72 | def spide(user_id=1669879400):
73 | """
74 | user_id 可以改成任意合法的用户id
75 | """
76 | # 使用实例,输入一个用户id,所有信息都会存储在wb实例中
77 | pic_download = 0 # 值为0代表不下载微博原始图片,1代表下载微博原始图片
78 | wb = WBSpider(user_id, pic_download) # 调用Weibo类,创建微博实例wb
79 | t1 = threading.Thread(target=wb.start)
80 | t1.start()
81 | # try:
82 | # _thread.start_new_thread(wb.start, ("ssssss",))
83 | # except Exception as e:
84 | # logging.error(str(e))
85 | # logging.error("Error: unable to start thread")
86 |
87 | def __init__(self, user_id, pic_download=0):
88 | """Weibo类初始化"""
89 | if not isinstance(user_id, int):
90 | sys.exit(u"user_id值应为一串数字形式,请重新输入")
91 | if pic_download != 0 and pic_download != 1:
92 | sys.exit(u"pic_download值应为0或1,请重新输入")
93 | self.user_id = user_id # 用户id,即需要我们输入的数字,如昵称为"Dear-迪丽热巴"的id为1669879400
94 | self.pic_download = pic_download # 取值范围为0、1,程序默认值为0,代表不下载微博原始图片,1代表下载
95 | # 根据抓取信息赋值
96 | self.userInfo = None
97 | self.weobo_containerid = None
98 | self.total_pages = 0
99 | self.all_cards = []
100 | self.all_comments = []
101 | self.got_num = 0
102 | self.comments_got_num = 0
103 | self.got_comments_mids = []
104 |
105 | def request_data(self, url):
106 | logging.info(f'Requesting {url}...')
107 | driver.get(url)
108 | # driver.save_screenshot("screenshot.png") # 截个图
109 | # print(driver.page_source) # 打印源码
110 | try:
111 | json_str = driver.find_element_by_css_selector("pre").text
112 | except:
113 | logging.info(f'{url} 的请求结果无法转为 json:{driver.page_source}')
114 | try:
115 | driver.find_element_by_xpath("//body/div/p[@class='h5-4con']")
116 | print('请求过于频繁,将切换当前使用的用户重新登录...')
117 | global current_user_index
118 | current_user_index = (current_user_index + 1) % len(settings.USER_PASSWORD)
119 | login()
120 | except common.exceptions.NoSuchElementException:
121 | # 不是请求过于频繁的话,目前默认是没有正确登录
122 | login()
123 | return self.request_data(url)
124 | # print(text)
125 | res_obj = json.loads(json_str, encoding='UTF-8')
126 |
127 | global num_requested
128 | num_requested += 1
129 | if num_requested % settings.WAITING_FOR_REQUESTS == 0:
130 | logging.info(f'当前已经请求 {num_requested} 次,等待 {settings.DELAY} 秒')
131 | time.sleep(settings.DELAY)
132 |
133 | # response = requests.get(url, headers=settings.DEFAULT_REQUEST_HEADERS)
134 | # if response.status_code >= 400:
135 | # rand = random.randint(6, 10)
136 | # logging.info(f"出现错误状态码 {response.status_code},等待 {rand} 秒")
137 | # time.sleep(rand)
138 | # return self.request_data(url)
139 | # try:
140 | # data = response.json()
141 | # except Exception as e:
142 | # rand = random.randint(6, 10)
143 | # logging.info(f"出现 HTML {response.content},等待 {rand} 秒")
144 | # time.sleep(rand)
145 | # return self.request_data(url)
146 | # if res_obj == None:
147 | # rand = random.randint(6, 10)
148 | # logging.info(f"出现状态码 {res_obj.status_code},空返回结果,等待 {rand} 秒")
149 | # time.sleep(rand)
150 | # return self.request_data(url)
151 | # if data["ok"] == 0:
152 | # if data["msg"] == "请求过于频繁,歇歇吧":
153 | # rand = random.randint(6, 10)
154 | # logging.info(f"访问被限制,等待 {rand} 秒")
155 | # time.sleep(rand)
156 | # return self.request_data(url)
157 | # assert data["ok"] == 1
158 | # 有时候会得到:
159 | # {'ok': 0, 'msg': '这里还没有内容', 'data': {'cards': []}}
160 | return res_obj["data"]
161 |
162 | def save_json(self, data, type='userinfo.json'):
163 | json_path = self.get_filepath(type)
164 | with open(json_path, 'w', encoding='utf-8') as outfile:
165 | json.dump(data, outfile, ensure_ascii=False, indent=2)
166 |
167 | def start(self):
168 | """运行爬虫"""
169 | try:
170 | if crawling_user_ids.count(self.user_id):
171 | logging.warning(f'uid {self.user_id} 正在爬取,拒绝加入到爬取池')
172 | return
173 | elif crawled_user_ids.count(self.user_id):
174 | logging.warning(f'uid {self.user_id} 已经爬取完毕,拒绝加入到爬取池')
175 | return
176 | elif waiting_user_ids.count(self.user_id):
177 | logging.warning(f'uid {self.user_id} 正在等待被抓取,拒绝加入到爬取池')
178 | return
179 | elif len(crawling_user_ids) < settings.MAX_CRAWING_USERS:
180 | crawling_user_ids.append(self.user_id)
181 | logging.info(f'uid {self.user_id} 加入到爬取池')
182 | else:
183 | waiting_user_ids.append(self.user_id)
184 | logging.info(f'uid {self.user_id} 加入到等待池')
185 | return
186 | if not self.get_user_info():
187 | logging.error('获取用户信息中断')
188 | return
189 | if not self.get_all_weibo():
190 | logging.error('获取所有微博中断')
191 | return
192 | crawling_user_ids.remove(self.user_id)
193 | crawled_user_ids.append(self.user_id)
194 | # 存成文件避免下次启动时重复爬取
195 | with open('./progress.py', 'w', encoding='UTF-8') as crawled_py:
196 | crawled_py.write(f'crawled = {crawled_user_ids}\n')
197 | crawled_py.write(f'crawling = {crawling_user_ids}\n')
198 | logging.info(f"【uid: {self.user_id}, 昵称: {self.userInfo['screen_name']}】爬取完毕,已经从爬取池中删除")
199 | if len(waiting_user_ids) > 0:
200 | wuid = waiting_user_ids.pop()
201 | logging.info(f"uid {self.user_id} 被取出等待池中")
202 | WBSpider.spide(wuid)
203 | logging.info("*" * 100)
204 | if self.pic_download == 1:
205 | self.download_pictures()
206 | except Exception as e:
207 | logging.error(str(e))
208 | traceback.print_exc()
209 |
210 | def get_user_info(self):
211 | """获取用户信息"""
212 | try:
213 | url = INFO_URL.format(self.user_id)
214 | data = self.request_data(url)
215 | self.userInfo = data["userInfo"]
216 | self.weobo_containerid = data["tabsInfo"]["tabs"][1]["containerid"]
217 | self.save_json(data)
218 | return True
219 | except Exception as e:
220 | logging.error(str(e))
221 | traceback.print_exc()
222 |
223 | def get_comments(self, mid, max_num=200):
224 | """
225 | max_num: 因为很多重复评论没有意义,所以最多抓取 200 条即可
226 | """
227 | if self.got_comments_mids.count(mid):
228 | logging.info(f'mid={mid} 的评论已经收集好了,不再收集')
229 | return
230 | url = COMMENT_URL1.format(self.user_id, mid, 0)
231 | data = self.request_data(url)
232 | max_id = data["max_id"]
233 | max_id_type = data["max_id_type"]
234 | wrote_num = 0
235 | page = 0
236 | while True:
237 | page += 1
238 | url = COMMENT_URL2.format(self.user_id, mid, max_id_type, max_id)
239 | data = self.request_data(url)
240 | if data == None:
241 | break
242 | max_id = data["max_id"]
243 | max_id_type = data["max_id_type"]
244 | for comment in data["data"]:
245 | # print(comment)
246 | # exit()
247 | selector = etree.HTML(comment["text"])
248 | self.all_comments.append({'mid': mid, 'id': comment["id"], 'created_at': comment["created_at"], 'user_id': comment["user"]["id"], 'text': etree.tostring(selector, method="text", encoding="UTF-8").decode('utf-8'), 'img_emoji': selector.xpath("//span/img/@alt")})
249 | self.comments_got_num += 1
250 |
251 | # 把评论用户也加入爬取池
252 | uid = comment["user"]["id"]
253 | uname = comment["user"]["screen_name"]
254 | logging.info(f'尝试抓取评论者 {uid}, {uname}')
255 | WBSpider.spide
256 |
257 | if page % 20 == 0: # 每爬20页写入一次文件
258 | # 写文件
259 | if self.comments_got_num > wrote_num:
260 | self.write_comment_csv(wrote_num, type=f"{mid}.comments.csv")
261 | wrote_num = self.comments_got_num
262 |
263 | # 评论已经够用了
264 | if self.comments_got_num >= max_num:
265 | break
266 | # 经测试,这是评论终止条件
267 | if max_id == 0 and max_id_type == 0:
268 | break
269 |
270 | self.write_comment_csv(wrote_num, type=f"{mid}.comments.csv") # 将剩余不足20页的评论写入文件
271 | logging.info(f"用户 {self.user_id} 的微博 {mid} 共爬取" + str(self.comments_got_num) + u"条评论")
272 | self.all_comments = []
273 | self.comments_got_num = 0
274 |
275 | def get_all_weibo(self):
276 | """获取用户信息"""
277 | try:
278 | url = WEIBO_URL.format(self.user_id, self.weobo_containerid, 1)
279 | data = self.request_data(url)
280 | self.total_pages = int(self.userInfo["statuses_count"]/10)+1
281 | self.save_json(data, type='cards1.json')
282 |
283 | page1 = 0
284 | wrote_num = 0
285 | for i in tqdm.tqdm(range(self.total_pages), desc=f"【uid: {self.user_id}, 昵称: {self.userInfo['screen_name']}】爬取进度"):
286 | logging.info(f"【uid: {self.user_id}, 昵称: {self.userInfo['screen_name']}】爬取进度:{i+1}/{self.total_pages}")
287 | page = i+1
288 | url = WEIBO_URL.format(self.user_id, self.weobo_containerid, page)
289 | data = self.request_data(url)
290 | for card in data['cards']:
291 | # 忽略广告等其他卡片
292 | if card["card_type"] != 9:
293 | continue
294 | mblog = card["mblog"]
295 | # 如果是转发微博的话,就抓取被转发者
296 | if "retweeted_status" in mblog:
297 | uid = mblog["retweeted_status"]["user"]["id"]
298 | uname = mblog["retweeted_status"]["user"]["screen_name"]
299 | logging.info(f'尝试抓取被转发者 {uid}, {uname}')
300 | WBSpider.spide(user_id=uid)
301 | continue
302 | selector = etree.HTML(mblog["text"])
303 | a_text = selector.xpath("//a/text()")
304 | if u"全文" in a_text:
305 | if None == self.get_long_weibo(mblog["mid"]):
306 | logging.error(f'无法获取全文,uid: {self.user_id},页数: {page},微博:{mblog["mid"]}')
307 | logging.error(f'其短文为 {mblog["text"]}')
308 | else:
309 | mblog["text"] = self.get_long_weibo(mblog["mid"])
310 | selector = etree.HTML(mblog["text"])
311 | # 将 HTML 转换为 txt
312 | # 参考 https://www.zybuluo.com/Alston/note/778377
313 | mblog["img_emoji"] = selector.xpath("//span/img/@alt")
314 | mblog["text"] = etree.tostring(selector, method="text", encoding="UTF-8").decode('utf-8')
315 | self.all_cards.append(card)
316 | self.got_num += 1
317 | # 抓取评论
318 | self.get_comments(mblog["mid"])
319 |
320 | if page % 20 == 0: # 每爬20页写入一次文件
321 | # 写文件
322 | if self.got_num > wrote_num:
323 | self.write_csv(wrote_num)
324 | wrote_num = self.got_num
325 |
326 | self.write_csv(wrote_num) # 将剩余不足20页的微博写入文件
327 | logging.info(f"用户 {user_id} 共爬取" + str(self.got_num) + u"条原创微博")
328 | return True
329 | except Exception as e:
330 | logging.error(str(e))
331 | traceback.print_exc()
332 |
333 | def get_long_weibo(self, mid):
334 | """获取长微博"""
335 | try:
336 | url = LONG_WEIBO_URL.format(mid)
337 | data = self.request_data(url)
338 | long_content = data["longTextContent"]
339 | return long_content
340 | except Exception as e:
341 | logging.error(str(e))
342 | traceback.print_exc()
343 |
344 | def write_comment_csv(self, wrote_num, type):
345 | """将爬取的评论写入txt文件"""
346 | try:
347 | result_headers = [
348 | "mid",
349 | "评论 id",
350 | "评论人 id",
351 | "评论时间",
352 | "评论内容",
353 | "包含图片表情"
354 | ]
355 | result_data = [ [ comment["mid"], comment["id"], comment["user_id"], comment["created_at"], comment["text"], comment['img_emoji'] ] for comment in self.all_comments][wrote_num:]
356 | with open(self.get_filepath(type),
357 | "a",
358 | encoding="utf-8",
359 | newline="") as f:
360 | writer = csv.writer(f)
361 | if wrote_num == 0:
362 | writer.writerows([result_headers])
363 | writer.writerows(result_data)
364 | assert self.comments_got_num == len(self.all_comments)
365 | print(u"%d条评论写入csv文件完毕,保存路径:" % self.comments_got_num)
366 | print(self.get_filepath(type))
367 | except Exception as e:
368 | logging.error(str(e))
369 | traceback.print_exc()
370 |
371 | def write_csv(self, wrote_num):
372 | """将爬取的信息写入txt文件"""
373 | try:
374 | result_headers = [
375 | "mid",
376 | "发布时间",
377 | "微博内容",
378 | "包含图片表情"
379 | ]
380 | result_data = [ [card["mblog"]["mid"], card["mblog"]["created_at"], card["mblog"]["text"], card["mblog"]['img_emoji'] ] for card in self.all_cards][wrote_num:]
381 | with open(self.get_filepath("csv"),
382 | "a",
383 | encoding="utf-8",
384 | newline="") as f:
385 | writer = csv.writer(f)
386 | if wrote_num == 0:
387 | writer.writerows([result_headers])
388 | writer.writerows(result_data)
389 | assert self.got_num == len(self.all_cards)
390 | print(u"%d条微博写入csv文件完毕,保存路径:" % self.got_num)
391 | print(self.get_filepath("csv"))
392 | except Exception as e:
393 | logging.error(str(e))
394 | traceback.print_exc()
395 |
396 | def download_pictures(self):
397 | pass
398 |
399 | def get_filepath(self, type):
400 | """获取结果文件路径"""
401 | try:
402 | file_dir = os.path.dirname(os.path.realpath('__file__')) + os.sep + "weibo" + os.sep + self.userInfo["screen_name"]
403 | # 图片的话就是 'img' 目录
404 | if type == "img":
405 | file_dir = file_dir + os.sep + "img"
406 | # 没有目录的时候自动创建
407 | if not os.path.isdir(file_dir):
408 | os.makedirs(file_dir)
409 | if type == "img":
410 | return file_dir
411 | # 其他类型的话,直接返回 user_id.csv
412 | file_path = file_dir + os.sep + "%d" % self.user_id + "." + type
413 | return file_path
414 | except Exception as e:
415 | logging.error(str(e))
416 | traceback.print_exc()
417 |
418 | def init_logging(name='testname', log_level=logging.INFO):
419 | file_dir = os.path.dirname(os.path.realpath('__file__')) + "/log"
420 | # 没有目录的时候自动创建
421 | if not os.path.isdir(file_dir):
422 | os.makedirs(file_dir)
423 | fileh = logging.FileHandler(file_dir+f'/{name}-{logging.getLevelName(log_level)}.log', 'w', encoding='utf-8')
424 | formatter = logging.Formatter("%(asctime)s;%(levelname)s;%(message)s",
425 | "%Y-%m-%d %H:%M:%S")
426 | fileh.setFormatter(formatter)
427 |
428 | log = logging.getLogger() # root logger
429 | for hdlr in log.handlers[:]: # remove all old handlers
430 | log.removeHandler(hdlr)
431 | log.addHandler(fileh) # set the new handler
432 | log.setLevel(log_level)
433 |
434 | return fileh
435 |
436 | if __name__ == "__main__":
437 | init_logging(name=datetime.datetime.now().strftime("%Y-%m-%d %H-%M-%S"))
438 | login()
439 | for user_id in progress.crawling:
440 | WBSpider.spide(user_id)
441 |
--------------------------------------------------------------------------------
/src/files/imgEmoji.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
![[微笑]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_hehe-039d0a6a8a.png)
![[可爱]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_keai-7a5bf88086.png)
![[太开心]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_taikaixin-97bd3f82d6.png)
![[鼓掌]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_guzhang-a35dfd4d70.png)
![[嘻嘻]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_xixi-813ededea2.png)
![[哈哈]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_haha-dd1c6d36cf.png)
![[笑cry]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_xiaoku-d320324f00.png)
![[挤眼]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_jiyan-feeb0a726c.png)
![[馋嘴]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_chanzui-01ee2388fd.png)
![[黑线]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_heixian-bde08b426c.png)
![[汗]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_han-0e7b8aa6d1.png)
![[挖鼻]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_wabishi-842338e697.png)
![[哼]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_heng-fe4c7da3e7.png)
![[怒]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_nu-54e54e160b.png)
![[委屈]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_weiqu-b069337758.png)
![[可怜]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_kelian-3e00ccdc26.png)
![[失望]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_shiwang-7925938d93.png)
![[悲伤]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_beishang-f8d6de06c8.png)
![[泪]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_lei-1b4b02f8b1.png)
![[允悲]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_yunbei-9aa3c436a4.png)
![[害羞]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_haixiu-1c4cb9b053.png)
![[污]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_wu-12f8564d2b.png)
![[爱你]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_aini-4a23c0524a.png)
![[亲亲]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_qinqin-ec0877767a.png)
![[色]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_huaxin-36e1b80629.png)
![[舔屏]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_tian-fe397541e6.png)
![[憧憬]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_xingxingyan-06a3ca0ae4.png)
![[doge]](https://h5.sinaimg.cn/m/emoticon/icon/others/d_doge-861403219c.png)
![[喵喵]](https://h5.sinaimg.cn/m/emoticon/icon/others/d_miao-61fe2a7aaa.png)
![[二哈]](https://h5.sinaimg.cn/m/emoticon/icon/others/d_erha-0fecc90ac1.png)
![[坏笑]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_huaixiao-b1c3a99d55.png)
![[阴险]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_yinxian-31824f8e19.png)
![[笑而不语]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_heiheihei-f7ca09d6e8.png)
![[偷笑]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_touxiao-15afb1c739.png)
![[酷]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_ku-6fa1a42f7b.png)
![[并不简单]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_bingbujiandan-e0c6936005.png)
![[思考]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_sikao-c599fd085f.png)
![[疑问]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_yiwen-40a816d206.png)
![[费解]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_feijie-bac8e40ab4.png)
![[晕]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_yun-8746541994.png)
![[衰]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_shuai-3dcd4d86c5.png)
![[骷髅]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_kulou-8416b2c6bc.png)
![[嘘]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_xu-1ad70f0070.png)
![[闭嘴]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_bizui-351f5d7ae8.png)
![[傻眼]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_shayan-c1a5f8fbc5.png)
![[吃惊]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_chijing-e806473437.png)
![[吐]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_tu-b5c18d9140.png)
![[感冒]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_ganmao-d66b25d11a.png)
![[生病]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_shengbing-9098a7d928.png)
![[拜拜]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_baibai-71b47dffdc.png)
![[鄙视]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_bishi-34f7294c90.png)
[左哼哼]![[右哼哼]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_zuohengheng-837f5b098f.png)
![[抓狂]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_zhuakuang-fd744b17b5.png)
![[怒骂]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_numa-8965c9df78.png)
![[打脸]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_dalian-2567443949.png)
![[顶]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_ding-e7a0057a7d.png)
![[互粉]](https://h5.sinaimg.cn/m/emoticon/icon/default/f_hufen-848e342fc6.png)
![[钱]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_qian-342451ec9d.png)
![[哈欠]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_dahaqi-470c302c19.png)
![[困]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_kun-a593cf62c9.png)
![[睡]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_shuijiao-6feb2f1452.png)
![[吃瓜]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_chigua-c95cd5ba58.png)
![[抱抱]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_baobao-b928ba5761.png)
![[摊手]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_tanshou-3abaa4ed77.png)
![[跪了]](https://h5.sinaimg.cn/m/emoticon/icon/default/d_guile-7b3e474f7f.png)
![[心]](https://h5.sinaimg.cn/m/emoticon/icon/others/l_xin-6912791858.png)
![[伤心]](https://h5.sinaimg.cn/m/emoticon/icon/others/l_shangxin-934f730572.png)
![[鲜花]](https://h5.sinaimg.cn/m/emoticon/icon/others/w_xianhua-6efa26efdf.png)
![[男孩儿]](https://h5.sinaimg.cn/m/emoticon/icon/others/d_nanhaier-27416c66bb.png)
![[女孩儿]](https://h5.sinaimg.cn/m/emoticon/icon/others/d_nvhaier-4dac6de6ba.png)
![[握手]](https://h5.sinaimg.cn/m/emoticon/icon/others/h_woshou-5f420e76e3.png)
![[作揖]](https://h5.sinaimg.cn/m/emoticon/icon/others/h_zuoyi-cb12e18fd5.png)
![[赞]](https://h5.sinaimg.cn/m/emoticon/icon/others/h_zan-e3d1e596da.png)
![[耶]](https://h5.sinaimg.cn/m/emoticon/icon/others/h_ye-256191c090.png)
![[good]](https://h5.sinaimg.cn/m/emoticon/icon/others/h_good-644bcfa993.png)
![[弱]](https://h5.sinaimg.cn/m/emoticon/icon/others/h_ruo-89790b4f76.png)
![[NO]](https://h5.sinaimg.cn/m/emoticon/icon/others/h_buyao-cf20be4de3.png)
![[ok]](https://h5.sinaimg.cn/m/emoticon/icon/others/h_ok-4bd9b83e8e.png)
![[haha]](https://h5.sinaimg.cn/m/emoticon/icon/others/h_haha-62cacecd90.png)
![[来]](https://h5.sinaimg.cn/m/emoticon/icon/others/h_lai-7146755d29.png)
[加油]![[熊猫]](https://h5.sinaimg.cn/m/emoticon/icon/others/d_xiongmao-d85c63c04f.png)
![[兔子]](https://h5.sinaimg.cn/m/emoticon/icon/others/d_tuzi-ff7bf5a0f1.png)
![[猪头]](https://h5.sinaimg.cn/m/emoticon/icon/others/d_zhutou-c6a28f8e09.png)
![[草泥马]](https://h5.sinaimg.cn/m/emoticon/icon/others/d_shenshou-3d84d102d9.png)
![[奥特曼]](https://h5.sinaimg.cn/m/emoticon/icon/others/d_aoteman-f5e9d757b1.png)
![[太阳]](https://h5.sinaimg.cn/m/emoticon/icon/others/w_taiyang-90b439dadf.png)
![[月亮]](https://h5.sinaimg.cn/m/emoticon/icon/others/w_yueliang-7b19f500d5.png)
![[浮云]](https://h5.sinaimg.cn/m/emoticon/icon/others/w_fuyun-c532bc5113.png)
![[下雨]](https://h5.sinaimg.cn/m/emoticon/icon/others/w_xiayu-696d2a2be4.png)
![[沙尘暴]](https://h5.sinaimg.cn/m/emoticon/icon/others/w_shachenbao-f5b561e951.png)
![[微风]](https://h5.sinaimg.cn/m/emoticon/icon/others/w_weifeng-33be96b05d.png)
![[围观]](https://h5.sinaimg.cn/m/emoticon/icon/others/o_weiguan-182f18c20e.png)
![[飞机]](https://h5.sinaimg.cn/m/emoticon/icon/others/o_feiji-d02d2841f1.png)
![[照相机]](https://h5.sinaimg.cn/m/emoticon/icon/others/o_zhaoxiangji-58853b2884.png)
![[话筒]](https://h5.sinaimg.cn/m/emoticon/icon/others/o_huatong-a3c5f9bcc2.png)
![[音乐]](https://h5.sinaimg.cn/m/emoticon/icon/others/o_yinyue-89aa67be16.png)
![[蜡烛]](https://h5.sinaimg.cn/m/emoticon/icon/others/o_lazhu-6209fc6e73.png)
![[喜]](https://h5.sinaimg.cn/m/emoticon/icon/others/f_xi-aa0f9e7690.png)
![[给力]](https://h5.sinaimg.cn/m/emoticon/icon/others/f_geili-7e99751314.png)
![[威武]](https://h5.sinaimg.cn/m/emoticon/icon/others/f_v5-c583bc351a.png)
![[干杯]](https://h5.sinaimg.cn/m/emoticon/icon/others/o_ganbei-7c906a72ba.png)
![[蛋糕]](https://h5.sinaimg.cn/m/emoticon/icon/others/o_dangao-57caf5f65f.png)
![[礼物]](https://h5.sinaimg.cn/m/emoticon/icon/others/o_liwu-cd692ebd14.png)
![[钟]](https://h5.sinaimg.cn/m/emoticon/icon/others/o_zhong-cd59210f5a.png)
![[肥皂]](https://h5.sinaimg.cn/m/emoticon/icon/others/d_feizao-74763d4577.png)
![[绿丝带]](https://h5.sinaimg.cn/m/emoticon/icon/others/o_lvsidai-98e9d0748c.png)
![[围脖]](https://h5.sinaimg.cn/m/emoticon/icon/others/o_weibo-1f16a3b65b.png)
→_→![[羞嗒嗒]](https://h5.sinaimg.cn/m/emoticon/icon/lxh/lxh_xiudada-f44e8f5688.png)
![[好爱哦]](https://h5.sinaimg.cn/m/emoticon/icon/lxh/lxh_haoaio-93f6657665.png)
![[偷乐]](https://h5.sinaimg.cn/m/emoticon/icon/lxh/lxh_toule-d37aa91a7d.png)
![[赞啊]](https://h5.sinaimg.cn/m/emoticon/icon/lxh/lxh_zana-9c9fa46102.png)
![[笑哈哈]](https://h5.sinaimg.cn/m/emoticon/icon/lxh/lxh_xiaohaha-1bfd4648b7.png)
![[好喜欢]](https://h5.sinaimg.cn/m/emoticon/icon/lxh/lxh_haoxihuan-8b4400e556.png)
![[求关注]](https://h5.sinaimg.cn/m/emoticon/icon/lxh/lxh_qiuguanzhu-9953fab258.png)
🙃🤓🤗😱👿👻💩
🙈🙉🙊🙅🙋👏🙏
🎄⚡🍉🍗💊💣
![[锦鲤]](https://h5.sinaimg.cn/m/emoticon/icon/others/h_jinlihongbao-7048556552.png)
[带着微博去旅行]![[给你小心心]](https://h5.sinaimg.cn/m/emoticon/icon/others/qixi2018_xiaoxinxin-afb8c6ea83.png)
![[酸]](https://h5.sinaimg.cn/m/emoticon/icon/others/h_suan-a1ba7ede34.png)
![[佩奇]](https://h5.sinaimg.cn/m/emoticon/icon/others/a_peiqi-f1d74484da.png)
![[米奇比心]](https://h5.sinaimg.cn/m/emoticon/icon/movies/mickey_01-f93c4eac57.png)
![[米妮爱你]](https://h5.sinaimg.cn/m/emoticon/icon/movies/mickey_07-08d307f244.png)
![[米妮开心]](https://h5.sinaimg.cn/m/emoticon/icon/movies/mickey_08-b666291943.png)
[胖丁微笑]![[哆啦A梦吃惊]](https://h5.sinaimg.cn/m/emoticon/icon/doraemon/dr_chijing-1b0e255e0f.png)
![[哆啦A梦微笑]](https://h5.sinaimg.cn/m/emoticon/icon/doraemon/dr_weixiao-c7ecdd25aa.png)
![[钢铁侠]](https://h5.sinaimg.cn/m/emoticon/icon/movies/avengers_ironman-76f12d71df.png)
![[美国队长]](https://h5.sinaimg.cn/m/emoticon/icon/movies/avengers_captainamerica-395be15d68.png)
![[浩克]](https://h5.sinaimg.cn/m/emoticon/icon/movies/avengers_hulk-bb3ebb9a03.png)
![[雷神]](https://h5.sinaimg.cn/m/emoticon/icon/movies/avengers_thor-61cea97153.png)
![[黑寡妇]](https://h5.sinaimg.cn/m/emoticon/icon/movies/avengers_blackwidow-906ae1533a.png)
![[鹰眼]](https://h5.sinaimg.cn/m/emoticon/icon/movies/avengers_eagleeye-c2aab470e5.png)
![[惊奇队长]](https://h5.sinaimg.cn/m/emoticon/icon/movies/marvel_captain-4dfb3b2fc1.png)
![[奥克耶]](https://h5.sinaimg.cn/m/emoticon/icon/movies/avengers_okoye-859b6a4924.png)
![[蚁人]](https://h5.sinaimg.cn/m/emoticon/icon/movies/marvel_yiren-0442d6e903.png)
![[灭霸]](https://h5.sinaimg.cn/m/emoticon/icon/movies/avengers_thanos-05b82cd671.png)
![[蜘蛛侠]](https://h5.sinaimg.cn/m/emoticon/icon/movies/avengers_spiderman-484e4d2aa5.png)
![[洛基]](https://h5.sinaimg.cn/m/emoticon/icon/movies/avengers_loki-6d80a3c45c.png)
![[奇异博士]](https://h5.sinaimg.cn/m/emoticon/icon/movies/avengers_doctorstrange-c5e7a51127.png)
![[冬兵]](https://h5.sinaimg.cn/m/emoticon/icon/movies/avengers_wintersoldier-a229bcf2be.png)
![[黑豹]](https://h5.sinaimg.cn/m/emoticon/icon/movies/avengers_pngblackpanther-438588a5ce.png)
![[猩红女巫]](https://h5.sinaimg.cn/m/emoticon/icon/movies/avengers_wandadjangomaximoff-233001662f.png)
![[幻视]](https://h5.sinaimg.cn/m/emoticon/icon/movies/avengers_vision-2a23567e24.png)
![[星爵]](https://h5.sinaimg.cn/m/emoticon/icon/movies/avengers_starlord-c4f0730b49.png)
![[格鲁特]](https://h5.sinaimg.cn/m/emoticon/icon/movies/avengers_groot-0714625641.png)
![[螳螂妹]](https://h5.sinaimg.cn/m/emoticon/icon/movies/avengers_mantis-e007e7181f.png)
",
--------------------------------------------------------------------------------
/src/files/unicode.json:
--------------------------------------------------------------------------------
1 | {
2 | "100": "💯",
3 | "1234": "🔢",
4 | "grinning": "😀",
5 | "smiley": "😃",
6 | "smile": "😄",
7 | "grin": "😁",
8 | "laughing": "😆",
9 | "satisfied": "😆",
10 | "sweat_smile": "😅",
11 | "joy": "😂",
12 | "rofl": "🤣",
13 | "relaxed": "☺️",
14 | "blush": "😊",
15 | "innocent": "😇",
16 | "slightly_smiling_face": "🙂",
17 | "upside_down_face": "🙃",
18 | "wink": "😉",
19 | "relieved": "😌",
20 | "heart_eyes": "😍",
21 | "kissing_heart": "😘",
22 | "kissing": "😗",
23 | "kissing_smiling_eyes": "😙",
24 | "kissing_closed_eyes": "😚",
25 | "yum": "😋",
26 | "stuck_out_tongue_winking_eye": "😜",
27 | "stuck_out_tongue_closed_eyes": "😝",
28 | "stuck_out_tongue": "😛",
29 | "money_mouth_face": "🤑",
30 | "hugs": "🤗",
31 | "nerd_face": "🤓",
32 | "sunglasses": "😎",
33 | "clown_face": "🤡",
34 | "cowboy_hat_face": "🤠",
35 | "smirk": "😏",
36 | "unamused": "😒",
37 | "disappointed": "😞",
38 | "pensive": "😔",
39 | "worried": "😟",
40 | "confused": "😕",
41 | "slightly_frowning_face": "🙁",
42 | "frowning_face": "☹️",
43 | "persevere": "😣",
44 | "confounded": "😖",
45 | "tired_face": "😫",
46 | "weary": "😩",
47 | "triumph": "😤",
48 | "angry": "😠",
49 | "rage": "😡",
50 | "pout": "😡",
51 | "no_mouth": "😶",
52 | "neutral_face": "😐",
53 | "expressionless": "😑",
54 | "hushed": "😯",
55 | "frowning": "😦",
56 | "anguished": "😧",
57 | "open_mouth": "😮",
58 | "astonished": "😲",
59 | "dizzy_face": "😵",
60 | "flushed": "😳",
61 | "scream": "😱",
62 | "fearful": "😨",
63 | "cold_sweat": "😰",
64 | "cry": "😢",
65 | "disappointed_relieved": "😥",
66 | "drooling_face": "🤤",
67 | "sob": "😭",
68 | "sweat": "😓",
69 | "sleepy": "😪",
70 | "sleeping": "😴",
71 | "roll_eyes": "🙄",
72 | "thinking": "🤔",
73 | "lying_face": "🤥",
74 | "grimacing": "😬",
75 | "zipper_mouth_face": "🤐",
76 | "nauseated_face": "🤢",
77 | "sneezing_face": "🤧",
78 | "mask": "😷",
79 | "face_with_thermometer": "🤒",
80 | "face_with_head_bandage": "🤕",
81 | "smiling_imp": "😈",
82 | "imp": "👿",
83 | "japanese_ogre": "👹",
84 | "japanese_goblin": "👺",
85 | "hankey": "💩",
86 | "poop": "💩",
87 | "shit": "💩",
88 | "ghost": "👻",
89 | "skull": "💀",
90 | "skull_and_crossbones": "☠️",
91 | "alien": "👽",
92 | "space_invader": "👾",
93 | "robot": "🤖",
94 | "jack_o_lantern": "🎃",
95 | "smiley_cat": "😺",
96 | "smile_cat": "😸",
97 | "joy_cat": "😹",
98 | "heart_eyes_cat": "😻",
99 | "smirk_cat": "😼",
100 | "kissing_cat": "😽",
101 | "scream_cat": "🙀",
102 | "crying_cat_face": "😿",
103 | "pouting_cat": "😾",
104 | "open_hands": "👐",
105 | "raised_hands": "🙌",
106 | "clap": "👏",
107 | "pray": "🙏",
108 | "handshake": "🤝",
109 | "+1": "👍",
110 | "thumbsup": "👍",
111 | "-1": "👎",
112 | "thumbsdown": "👎",
113 | "fist_oncoming": "👊",
114 | "facepunch": "👊",
115 | "punch": "👊",
116 | "fist_raised": "✊",
117 | "fist": "✊",
118 | "fist_left": "🤛",
119 | "fist_right": "🤜",
120 | "crossed_fingers": "🤞",
121 | "v": "✌️",
122 | "metal": "🤘",
123 | "ok_hand": "👌",
124 | "point_left": "👈",
125 | "point_right": "👉",
126 | "point_up_2": "👆",
127 | "point_down": "👇",
128 | "point_up": "☝️",
129 | "hand": "✋",
130 | "raised_hand": "✋",
131 | "raised_back_of_hand": "🤚",
132 | "raised_hand_with_fingers_splayed": "🖐",
133 | "vulcan_salute": "🖖",
134 | "wave": "👋",
135 | "call_me_hand": "🤙",
136 | "muscle": "💪",
137 | "middle_finger": "🖕",
138 | "fu": "🖕",
139 | "writing_hand": "✍️",
140 | "selfie": "🤳",
141 | "nail_care": "💅",
142 | "ring": "💍",
143 | "lipstick": "💄",
144 | "kiss": "💋",
145 | "lips": "👄",
146 | "tongue": "👅",
147 | "ear": "👂",
148 | "nose": "👃",
149 | "footprints": "👣",
150 | "eye": "👁",
151 | "eyes": "👀",
152 | "speaking_head": "🗣",
153 | "bust_in_silhouette": "👤",
154 | "busts_in_silhouette": "👥",
155 | "baby": "👶",
156 | "boy": "👦",
157 | "girl": "👧",
158 | "man": "👨",
159 | "woman": "👩",
160 | "blonde_woman": "👱♀",
161 | "blonde_man": "👱",
162 | "person_with_blond_hair": "👱",
163 | "older_man": "👴",
164 | "older_woman": "👵",
165 | "man_with_gua_pi_mao": "👲",
166 | "woman_with_turban": "👳♀",
167 | "man_with_turban": "👳",
168 | "policewoman": "👮♀",
169 | "policeman": "👮",
170 | "cop": "👮",
171 | "construction_worker_woman": "👷♀",
172 | "construction_worker_man": "👷",
173 | "construction_worker": "👷",
174 | "guardswoman": "💂♀",
175 | "guardsman": "💂",
176 | "female_detective": "🕵️♀️",
177 | "male_detective": "🕵",
178 | "detective": "🕵",
179 | "woman_health_worker": "👩⚕",
180 | "man_health_worker": "👨⚕",
181 | "woman_farmer": "👩🌾",
182 | "man_farmer": "👨🌾",
183 | "woman_cook": "👩🍳",
184 | "man_cook": "👨🍳",
185 | "woman_student": "👩🎓",
186 | "man_student": "👨🎓",
187 | "woman_singer": "👩🎤",
188 | "man_singer": "👨🎤",
189 | "woman_teacher": "👩🏫",
190 | "man_teacher": "👨🏫",
191 | "woman_factory_worker": "👩🏭",
192 | "man_factory_worker": "👨🏭",
193 | "woman_technologist": "👩💻",
194 | "man_technologist": "👨💻",
195 | "woman_office_worker": "👩💼",
196 | "man_office_worker": "👨💼",
197 | "woman_mechanic": "👩🔧",
198 | "man_mechanic": "👨🔧",
199 | "woman_scientist": "👩🔬",
200 | "man_scientist": "👨🔬",
201 | "woman_artist": "👩🎨",
202 | "man_artist": "👨🎨",
203 | "woman_firefighter": "👩🚒",
204 | "man_firefighter": "👨🚒",
205 | "woman_pilot": "👩✈",
206 | "man_pilot": "👨✈",
207 | "woman_astronaut": "👩🚀",
208 | "man_astronaut": "👨🚀",
209 | "woman_judge": "👩⚖",
210 | "man_judge": "👨⚖",
211 | "mrs_claus": "🤶",
212 | "santa": "🎅",
213 | "princess": "👸",
214 | "prince": "🤴",
215 | "bride_with_veil": "👰",
216 | "man_in_tuxedo": "🤵",
217 | "angel": "👼",
218 | "pregnant_woman": "🤰",
219 | "bowing_woman": "🙇♀",
220 | "bowing_man": "🙇",
221 | "bow": "🙇",
222 | "tipping_hand_woman": "💁",
223 | "information_desk_person": "💁",
224 | "sassy_woman": "💁",
225 | "tipping_hand_man": "💁♂",
226 | "sassy_man": "💁♂",
227 | "no_good_woman": "🙅",
228 | "no_good": "🙅",
229 | "ng_woman": "🙅",
230 | "no_good_man": "🙅♂",
231 | "ng_man": "🙅♂",
232 | "ok_woman": "🙆",
233 | "ok_man": "🙆♂",
234 | "raising_hand_woman": "🙋",
235 | "raising_hand": "🙋",
236 | "raising_hand_man": "🙋♂",
237 | "woman_facepalming": "🤦♀",
238 | "man_facepalming": "🤦♂",
239 | "woman_shrugging": "🤷♀",
240 | "man_shrugging": "🤷♂",
241 | "pouting_woman": "🙎",
242 | "person_with_pouting_face": "🙎",
243 | "pouting_man": "🙎♂",
244 | "frowning_woman": "🙍",
245 | "person_frowning": "🙍",
246 | "frowning_man": "🙍♂",
247 | "haircut_woman": "💇",
248 | "haircut": "💇",
249 | "haircut_man": "💇♂",
250 | "massage_woman": "💆",
251 | "massage": "💆",
252 | "massage_man": "💆♂",
253 | "business_suit_levitating": "🕴",
254 | "dancer": "💃",
255 | "man_dancing": "🕺",
256 | "dancing_women": "👯",
257 | "dancers": "👯",
258 | "dancing_men": "👯♂",
259 | "walking_woman": "🚶♀",
260 | "walking_man": "🚶",
261 | "walking": "🚶",
262 | "running_woman": "🏃♀",
263 | "running_man": "🏃",
264 | "runner": "🏃",
265 | "running": "🏃",
266 | "couple": "👫",
267 | "two_women_holding_hands": "👭",
268 | "two_men_holding_hands": "👬",
269 | "couple_with_heart_woman_man": "💑",
270 | "couple_with_heart": "💑",
271 | "couple_with_heart_woman_woman": "👩❤️👩",
272 | "couple_with_heart_man_man": "👨❤️👨",
273 | "couplekiss_man_woman": "💏",
274 | "couplekiss_woman_woman": "👩❤️💋👩",
275 | "couplekiss_man_man": "👨❤️💋👨",
276 | "family_man_woman_boy": "👪",
277 | "family": "👪",
278 | "family_man_woman_girl": "👨👩👧",
279 | "family_man_woman_girl_boy": "👨👩👧👦",
280 | "family_man_woman_boy_boy": "👨👩👦👦",
281 | "family_man_woman_girl_girl": "👨👩👧👧",
282 | "family_woman_woman_boy": "👩👩👦",
283 | "family_woman_woman_girl": "👩👩👧",
284 | "family_woman_woman_girl_boy": "👩👩👧👦",
285 | "family_woman_woman_boy_boy": "👩👩👦👦",
286 | "family_woman_woman_girl_girl": "👩👩👧👧",
287 | "family_man_man_boy": "👨👨👦",
288 | "family_man_man_girl": "👨👨👧",
289 | "family_man_man_girl_boy": "👨👨👧👦",
290 | "family_man_man_boy_boy": "👨👨👦👦",
291 | "family_man_man_girl_girl": "👨👨👧👧",
292 | "family_woman_boy": "👩👦",
293 | "family_woman_girl": "👩👧",
294 | "family_woman_girl_boy": "👩👧👦",
295 | "family_woman_boy_boy": "👩👦👦",
296 | "family_woman_girl_girl": "👩👧👧",
297 | "family_man_boy": "👨👦",
298 | "family_man_girl": "👨👧",
299 | "family_man_girl_boy": "👨👧👦",
300 | "family_man_boy_boy": "👨👦👦",
301 | "family_man_girl_girl": "👨👧👧",
302 | "womans_clothes": "👚",
303 | "shirt": "👕",
304 | "tshirt": "👕",
305 | "jeans": "👖",
306 | "necktie": "👔",
307 | "dress": "👗",
308 | "bikini": "👙",
309 | "kimono": "👘",
310 | "high_heel": "👠",
311 | "sandal": "👡",
312 | "boot": "👢",
313 | "mans_shoe": "👞",
314 | "shoe": "👞",
315 | "athletic_shoe": "👟",
316 | "womans_hat": "👒",
317 | "tophat": "🎩",
318 | "mortar_board": "🎓",
319 | "crown": "👑",
320 | "rescue_worker_helmet": "⛑",
321 | "school_satchel": "🎒",
322 | "pouch": "👝",
323 | "purse": "👛",
324 | "handbag": "👜",
325 | "briefcase": "💼",
326 | "eyeglasses": "👓",
327 | "dark_sunglasses": "🕶",
328 | "closed_umbrella": "🌂",
329 | "open_umbrella": "☂️",
330 | "dog": "🐶",
331 | "cat": "🐱",
332 | "mouse": "🐭",
333 | "hamster": "🐹",
334 | "rabbit": "🐰",
335 | "fox_face": "🦊",
336 | "bear": "🐻",
337 | "panda_face": "🐼",
338 | "koala": "🐨",
339 | "tiger": "🐯",
340 | "lion": "🦁",
341 | "cow": "🐮",
342 | "pig": "🐷",
343 | "pig_nose": "🐽",
344 | "frog": "🐸",
345 | "monkey_face": "🐵",
346 | "see_no_evil": "🙈",
347 | "hear_no_evil": "🙉",
348 | "speak_no_evil": "🙊",
349 | "monkey": "🐒",
350 | "chicken": "🐔",
351 | "penguin": "🐧",
352 | "bird": "🐦",
353 | "baby_chick": "🐤",
354 | "hatching_chick": "🐣",
355 | "hatched_chick": "🐥",
356 | "duck": "🦆",
357 | "eagle": "🦅",
358 | "owl": "🦉",
359 | "bat": "🦇",
360 | "wolf": "🐺",
361 | "boar": "🐗",
362 | "horse": "🐴",
363 | "unicorn": "🦄",
364 | "bee": "🐝",
365 | "honeybee": "🐝",
366 | "bug": "🐛",
367 | "butterfly": "🦋",
368 | "snail": "🐌",
369 | "shell": "🐚",
370 | "beetle": "🐞",
371 | "ant": "🐜",
372 | "spider": "🕷",
373 | "spider_web": "🕸",
374 | "turtle": "🐢",
375 | "snake": "🐍",
376 | "lizard": "🦎",
377 | "scorpion": "🦂",
378 | "crab": "🦀",
379 | "squid": "🦑",
380 | "octopus": "🐙",
381 | "shrimp": "🦐",
382 | "tropical_fish": "🐠",
383 | "fish": "🐟",
384 | "blowfish": "🐡",
385 | "dolphin": "🐬",
386 | "flipper": "🐬",
387 | "shark": "🦈",
388 | "whale": "🐳",
389 | "whale2": "🐋",
390 | "crocodile": "🐊",
391 | "leopard": "🐆",
392 | "tiger2": "🐅",
393 | "water_buffalo": "🐃",
394 | "ox": "🐂",
395 | "cow2": "🐄",
396 | "deer": "🦌",
397 | "dromedary_camel": "🐪",
398 | "camel": "🐫",
399 | "elephant": "🐘",
400 | "rhinoceros": "🦏",
401 | "gorilla": "🦍",
402 | "racehorse": "🐎",
403 | "pig2": "🐖",
404 | "goat": "🐐",
405 | "ram": "🐏",
406 | "sheep": "🐑",
407 | "dog2": "🐕",
408 | "poodle": "🐩",
409 | "cat2": "🐈",
410 | "rooster": "🐓",
411 | "turkey": "🦃",
412 | "dove": "🕊",
413 | "rabbit2": "🐇",
414 | "mouse2": "🐁",
415 | "rat": "🐀",
416 | "chipmunk": "🐿",
417 | "feet": "🐾",
418 | "paw_prints": "🐾",
419 | "dragon": "🐉",
420 | "dragon_face": "🐲",
421 | "cactus": "🌵",
422 | "christmas_tree": "🎄",
423 | "evergreen_tree": "🌲",
424 | "deciduous_tree": "🌳",
425 | "palm_tree": "🌴",
426 | "seedling": "🌱",
427 | "herb": "🌿",
428 | "shamrock": "☘️",
429 | "four_leaf_clover": "🍀",
430 | "bamboo": "🎍",
431 | "tanabata_tree": "🎋",
432 | "leaves": "🍃",
433 | "fallen_leaf": "🍂",
434 | "maple_leaf": "🍁",
435 | "mushroom": "🍄",
436 | "ear_of_rice": "🌾",
437 | "bouquet": "💐",
438 | "tulip": "🌷",
439 | "rose": "🌹",
440 | "wilted_flower": "🥀",
441 | "sunflower": "🌻",
442 | "blossom": "🌼",
443 | "cherry_blossom": "🌸",
444 | "hibiscus": "🌺",
445 | "earth_americas": "🌎",
446 | "earth_africa": "🌍",
447 | "earth_asia": "🌏",
448 | "full_moon": "🌕",
449 | "waning_gibbous_moon": "🌖",
450 | "last_quarter_moon": "🌗",
451 | "waning_crescent_moon": "🌘",
452 | "new_moon": "🌑",
453 | "waxing_crescent_moon": "🌒",
454 | "first_quarter_moon": "🌓",
455 | "moon": "🌔",
456 | "waxing_gibbous_moon": "🌔",
457 | "new_moon_with_face": "🌚",
458 | "full_moon_with_face": "🌝",
459 | "sun_with_face": "🌞",
460 | "first_quarter_moon_with_face": "🌛",
461 | "last_quarter_moon_with_face": "🌜",
462 | "crescent_moon": "🌙",
463 | "dizzy": "💫",
464 | "star": "⭐️",
465 | "star2": "🌟",
466 | "sparkles": "✨",
467 | "zap": "⚡️",
468 | "fire": "🔥",
469 | "boom": "💥",
470 | "collision": "💥",
471 | "comet": "☄",
472 | "sunny": "☀️",
473 | "sun_behind_small_cloud": "🌤",
474 | "partly_sunny": "⛅️",
475 | "sun_behind_large_cloud": "🌥",
476 | "sun_behind_rain_cloud": "🌦",
477 | "rainbow": "🌈",
478 | "cloud": "☁️",
479 | "cloud_with_rain": "🌧",
480 | "cloud_with_lightning_and_rain": "⛈",
481 | "cloud_with_lightning": "🌩",
482 | "cloud_with_snow": "🌨",
483 | "snowman_with_snow": "☃️",
484 | "snowman": "⛄️",
485 | "snowflake": "❄️",
486 | "wind_face": "🌬",
487 | "dash": "💨",
488 | "tornado": "🌪",
489 | "fog": "🌫",
490 | "ocean": "🌊",
491 | "droplet": "💧",
492 | "sweat_drops": "💦",
493 | "umbrella": "☔️",
494 | "green_apple": "🍏",
495 | "apple": "🍎",
496 | "pear": "🍐",
497 | "tangerine": "🍊",
498 | "orange": "🍊",
499 | "mandarin": "🍊",
500 | "lemon": "🍋",
501 | "banana": "🍌",
502 | "watermelon": "🍉",
503 | "grapes": "🍇",
504 | "strawberry": "🍓",
505 | "melon": "🍈",
506 | "cherries": "🍒",
507 | "peach": "🍑",
508 | "pineapple": "🍍",
509 | "kiwi_fruit": "🥝",
510 | "avocado": "🥑",
511 | "tomato": "🍅",
512 | "eggplant": "🍆",
513 | "cucumber": "🥒",
514 | "carrot": "🥕",
515 | "corn": "🌽",
516 | "hot_pepper": "🌶",
517 | "potato": "🥔",
518 | "sweet_potato": "🍠",
519 | "chestnut": "🌰",
520 | "peanuts": "🥜",
521 | "honey_pot": "🍯",
522 | "croissant": "🥐",
523 | "bread": "🍞",
524 | "baguette_bread": "🥖",
525 | "cheese": "🧀",
526 | "egg": "🥚",
527 | "fried_egg": "🍳",
528 | "bacon": "🥓",
529 | "pancakes": "🥞",
530 | "fried_shrimp": "🍤",
531 | "poultry_leg": "🍗",
532 | "meat_on_bone": "🍖",
533 | "pizza": "🍕",
534 | "hotdog": "🌭",
535 | "hamburger": "🍔",
536 | "fries": "🍟",
537 | "stuffed_flatbread": "🥙",
538 | "taco": "🌮",
539 | "burrito": "🌯",
540 | "green_salad": "🥗",
541 | "shallow_pan_of_food": "🥘",
542 | "spaghetti": "🍝",
543 | "ramen": "🍜",
544 | "stew": "🍲",
545 | "fish_cake": "🍥",
546 | "sushi": "🍣",
547 | "bento": "🍱",
548 | "curry": "🍛",
549 | "rice": "🍚",
550 | "rice_ball": "🍙",
551 | "rice_cracker": "🍘",
552 | "oden": "🍢",
553 | "dango": "🍡",
554 | "shaved_ice": "🍧",
555 | "ice_cream": "🍨",
556 | "icecream": "🍦",
557 | "cake": "🍰",
558 | "birthday": "🎂",
559 | "custard": "🍮",
560 | "lollipop": "🍭",
561 | "candy": "🍬",
562 | "chocolate_bar": "🍫",
563 | "popcorn": "🍿",
564 | "doughnut": "🍩",
565 | "cookie": "🍪",
566 | "milk_glass": "🥛",
567 | "baby_bottle": "🍼",
568 | "coffee": "☕️",
569 | "tea": "🍵",
570 | "sake": "🍶",
571 | "beer": "🍺",
572 | "beers": "🍻",
573 | "clinking_glasses": "🥂",
574 | "wine_glass": "🍷",
575 | "tumbler_glass": "🥃",
576 | "cocktail": "🍸",
577 | "tropical_drink": "🍹",
578 | "champagne": "🍾",
579 | "spoon": "🥄",
580 | "fork_and_knife": "🍴",
581 | "plate_with_cutlery": "🍽",
582 | "soccer": "⚽️",
583 | "basketball": "🏀",
584 | "football": "🏈",
585 | "baseball": "⚾️",
586 | "tennis": "🎾",
587 | "volleyball": "🏐",
588 | "rugby_football": "🏉",
589 | "8ball": "🎱",
590 | "ping_pong": "🏓",
591 | "badminton": "🏸",
592 | "goal_net": "🥅",
593 | "ice_hockey": "🏒",
594 | "field_hockey": "🏑",
595 | "cricket": "🏏",
596 | "golf": "⛳️",
597 | "bow_and_arrow": "🏹",
598 | "fishing_pole_and_fish": "🎣",
599 | "boxing_glove": "🥊",
600 | "martial_arts_uniform": "🥋",
601 | "ice_skate": "⛸",
602 | "ski": "🎿",
603 | "skier": "⛷",
604 | "snowboarder": "🏂",
605 | "weight_lifting_woman": "🏋️♀️",
606 | "weight_lifting_man": "🏋",
607 | "person_fencing": "🤺",
608 | "women_wrestling": "🤼♀",
609 | "men_wrestling": "🤼♂",
610 | "woman_cartwheeling": "🤸♀",
611 | "man_cartwheeling": "🤸♂",
612 | "basketball_woman": "⛹️♀️",
613 | "basketball_man": "⛹",
614 | "woman_playing_handball": "🤾♀",
615 | "man_playing_handball": "🤾♂",
616 | "golfing_woman": "🏌️♀️",
617 | "golfing_man": "🏌",
618 | "surfing_woman": "🏄♀",
619 | "surfing_man": "🏄",
620 | "surfer": "🏄",
621 | "swimming_woman": "🏊♀",
622 | "swimming_man": "🏊",
623 | "swimmer": "🏊",
624 | "woman_playing_water_polo": "🤽♀",
625 | "man_playing_water_polo": "🤽♂",
626 | "rowing_woman": "🚣♀",
627 | "rowing_man": "🚣",
628 | "rowboat": "🚣",
629 | "horse_racing": "🏇",
630 | "biking_woman": "🚴♀",
631 | "biking_man": "🚴",
632 | "bicyclist": "🚴",
633 | "mountain_biking_woman": "🚵♀",
634 | "mountain_biking_man": "🚵",
635 | "mountain_bicyclist": "🚵",
636 | "running_shirt_with_sash": "🎽",
637 | "medal_sports": "🏅",
638 | "medal_military": "🎖",
639 | "1st_place_medal": "🥇",
640 | "2nd_place_medal": "🥈",
641 | "3rd_place_medal": "🥉",
642 | "trophy": "🏆",
643 | "rosette": "🏵",
644 | "reminder_ribbon": "🎗",
645 | "ticket": "🎫",
646 | "tickets": "🎟",
647 | "circus_tent": "🎪",
648 | "woman_juggling": "🤹♀",
649 | "man_juggling": "🤹♂",
650 | "performing_arts": "🎭",
651 | "art": "🎨",
652 | "clapper": "🎬",
653 | "microphone": "🎤",
654 | "headphones": "🎧",
655 | "musical_score": "🎼",
656 | "musical_keyboard": "🎹",
657 | "drum": "🥁",
658 | "saxophone": "🎷",
659 | "trumpet": "🎺",
660 | "guitar": "🎸",
661 | "violin": "🎻",
662 | "game_die": "🎲",
663 | "dart": "🎯",
664 | "bowling": "🎳",
665 | "video_game": "🎮",
666 | "slot_machine": "🎰",
667 | "car": "🚗",
668 | "red_car": "🚗",
669 | "taxi": "🚕",
670 | "blue_car": "🚙",
671 | "bus": "🚌",
672 | "trolleybus": "🚎",
673 | "racing_car": "🏎",
674 | "police_car": "🚓",
675 | "ambulance": "🚑",
676 | "fire_engine": "🚒",
677 | "minibus": "🚐",
678 | "truck": "🚚",
679 | "articulated_lorry": "🚛",
680 | "tractor": "🚜",
681 | "kick_scooter": "🛴",
682 | "bike": "🚲",
683 | "motor_scooter": "🛵",
684 | "motorcycle": "🏍",
685 | "rotating_light": "🚨",
686 | "oncoming_police_car": "🚔",
687 | "oncoming_bus": "🚍",
688 | "oncoming_automobile": "🚘",
689 | "oncoming_taxi": "🚖",
690 | "aerial_tramway": "🚡",
691 | "mountain_cableway": "🚠",
692 | "suspension_railway": "🚟",
693 | "railway_car": "🚃",
694 | "train": "🚋",
695 | "mountain_railway": "🚞",
696 | "monorail": "🚝",
697 | "bullettrain_side": "🚄",
698 | "bullettrain_front": "🚅",
699 | "light_rail": "🚈",
700 | "steam_locomotive": "🚂",
701 | "train2": "🚆",
702 | "metro": "🚇",
703 | "tram": "🚊",
704 | "station": "🚉",
705 | "helicopter": "🚁",
706 | "small_airplane": "🛩",
707 | "airplane": "✈️",
708 | "flight_departure": "🛫",
709 | "flight_arrival": "🛬",
710 | "rocket": "🚀",
711 | "artificial_satellite": "🛰",
712 | "seat": "💺",
713 | "canoe": "🛶",
714 | "boat": "⛵️",
715 | "sailboat": "⛵️",
716 | "motor_boat": "🛥",
717 | "speedboat": "🚤",
718 | "passenger_ship": "🛳",
719 | "ferry": "⛴",
720 | "ship": "🚢",
721 | "anchor": "⚓️",
722 | "construction": "🚧",
723 | "fuelpump": "⛽️",
724 | "busstop": "🚏",
725 | "vertical_traffic_light": "🚦",
726 | "traffic_light": "🚥",
727 | "world_map": "🗺",
728 | "moyai": "🗿",
729 | "statue_of_liberty": "🗽",
730 | "fountain": "⛲️",
731 | "tokyo_tower": "🗼",
732 | "european_castle": "🏰",
733 | "japanese_castle": "🏯",
734 | "stadium": "🏟",
735 | "ferris_wheel": "🎡",
736 | "roller_coaster": "🎢",
737 | "carousel_horse": "🎠",
738 | "parasol_on_ground": "⛱",
739 | "beach_umbrella": "🏖",
740 | "desert_island": "🏝",
741 | "mountain": "⛰",
742 | "mountain_snow": "🏔",
743 | "mount_fuji": "🗻",
744 | "volcano": "🌋",
745 | "desert": "🏜",
746 | "camping": "🏕",
747 | "tent": "⛺️",
748 | "railway_track": "🛤",
749 | "motorway": "🛣",
750 | "building_construction": "🏗",
751 | "factory": "🏭",
752 | "house": "🏠",
753 | "house_with_garden": "🏡",
754 | "houses": "🏘",
755 | "derelict_house": "🏚",
756 | "office": "🏢",
757 | "department_store": "🏬",
758 | "post_office": "🏣",
759 | "european_post_office": "🏤",
760 | "hospital": "🏥",
761 | "bank": "🏦",
762 | "hotel": "🏨",
763 | "convenience_store": "🏪",
764 | "school": "🏫",
765 | "love_hotel": "🏩",
766 | "wedding": "💒",
767 | "classical_building": "🏛",
768 | "church": "⛪️",
769 | "mosque": "🕌",
770 | "synagogue": "🕍",
771 | "kaaba": "🕋",
772 | "shinto_shrine": "⛩",
773 | "japan": "🗾",
774 | "rice_scene": "🎑",
775 | "national_park": "🏞",
776 | "sunrise": "🌅",
777 | "sunrise_over_mountains": "🌄",
778 | "stars": "🌠",
779 | "sparkler": "🎇",
780 | "fireworks": "🎆",
781 | "city_sunrise": "🌇",
782 | "city_sunset": "🌆",
783 | "cityscape": "🏙",
784 | "night_with_stars": "🌃",
785 | "milky_way": "🌌",
786 | "bridge_at_night": "🌉",
787 | "foggy": "🌁",
788 | "watch": "⌚️",
789 | "iphone": "📱",
790 | "calling": "📲",
791 | "computer": "💻",
792 | "keyboard": "⌨️",
793 | "desktop_computer": "🖥",
794 | "printer": "🖨",
795 | "computer_mouse": "🖱",
796 | "trackball": "🖲",
797 | "joystick": "🕹",
798 | "clamp": "🗜",
799 | "minidisc": "💽",
800 | "floppy_disk": "💾",
801 | "cd": "💿",
802 | "dvd": "📀",
803 | "vhs": "📼",
804 | "camera": "📷",
805 | "camera_flash": "📸",
806 | "video_camera": "📹",
807 | "movie_camera": "🎥",
808 | "film_projector": "📽",
809 | "film_strip": "🎞",
810 | "telephone_receiver": "📞",
811 | "phone": "☎️",
812 | "telephone": "☎️",
813 | "pager": "📟",
814 | "fax": "📠",
815 | "tv": "📺",
816 | "radio": "📻",
817 | "studio_microphone": "🎙",
818 | "level_slider": "🎚",
819 | "control_knobs": "🎛",
820 | "stopwatch": "⏱",
821 | "timer_clock": "⏲",
822 | "alarm_clock": "⏰",
823 | "mantelpiece_clock": "🕰",
824 | "hourglass": "⌛️",
825 | "hourglass_flowing_sand": "⏳",
826 | "satellite": "📡",
827 | "battery": "🔋",
828 | "electric_plug": "🔌",
829 | "bulb": "💡",
830 | "flashlight": "🔦",
831 | "candle": "🕯",
832 | "wastebasket": "🗑",
833 | "oil_drum": "🛢",
834 | "money_with_wings": "💸",
835 | "dollar": "💵",
836 | "yen": "💴",
837 | "euro": "💶",
838 | "pound": "💷",
839 | "moneybag": "💰",
840 | "credit_card": "💳",
841 | "gem": "💎",
842 | "balance_scale": "⚖️",
843 | "wrench": "🔧",
844 | "hammer": "🔨",
845 | "hammer_and_pick": "⚒",
846 | "hammer_and_wrench": "🛠",
847 | "pick": "⛏",
848 | "nut_and_bolt": "🔩",
849 | "gear": "⚙️",
850 | "chains": "⛓",
851 | "gun": "🔫",
852 | "bomb": "💣",
853 | "hocho": "🔪",
854 | "knife": "🔪",
855 | "dagger": "🗡",
856 | "crossed_swords": "⚔️",
857 | "shield": "🛡",
858 | "smoking": "🚬",
859 | "coffin": "⚰️",
860 | "funeral_urn": "⚱️",
861 | "amphora": "🏺",
862 | "crystal_ball": "🔮",
863 | "prayer_beads": "📿",
864 | "barber": "💈",
865 | "alembic": "⚗️",
866 | "telescope": "🔭",
867 | "microscope": "🔬",
868 | "hole": "🕳",
869 | "pill": "💊",
870 | "syringe": "💉",
871 | "thermometer": "🌡",
872 | "toilet": "🚽",
873 | "potable_water": "🚰",
874 | "shower": "🚿",
875 | "bathtub": "🛁",
876 | "bath": "🛀",
877 | "bellhop_bell": "🛎",
878 | "key": "🔑",
879 | "old_key": "🗝",
880 | "door": "🚪",
881 | "couch_and_lamp": "🛋",
882 | "bed": "🛏",
883 | "sleeping_bed": "🛌",
884 | "framed_picture": "🖼",
885 | "shopping": "🛍",
886 | "shopping_cart": "🛒",
887 | "gift": "🎁",
888 | "balloon": "🎈",
889 | "flags": "🎏",
890 | "ribbon": "🎀",
891 | "confetti_ball": "🎊",
892 | "tada": "🎉",
893 | "dolls": "🎎",
894 | "izakaya_lantern": "🏮",
895 | "lantern": "🏮",
896 | "wind_chime": "🎐",
897 | "email": "✉️",
898 | "envelope": "✉️",
899 | "envelope_with_arrow": "📩",
900 | "incoming_envelope": "📨",
901 | "e-mail": "📧",
902 | "love_letter": "💌",
903 | "inbox_tray": "📥",
904 | "outbox_tray": "📤",
905 | "package": "📦",
906 | "label": "🏷",
907 | "mailbox_closed": "📪",
908 | "mailbox": "📫",
909 | "mailbox_with_mail": "📬",
910 | "mailbox_with_no_mail": "📭",
911 | "postbox": "📮",
912 | "postal_horn": "📯",
913 | "scroll": "📜",
914 | "page_with_curl": "📃",
915 | "page_facing_up": "📄",
916 | "bookmark_tabs": "📑",
917 | "bar_chart": "📊",
918 | "chart_with_upwards_trend": "📈",
919 | "chart_with_downwards_trend": "📉",
920 | "spiral_notepad": "🗒",
921 | "spiral_calendar": "🗓",
922 | "calendar": "📆",
923 | "date": "📅",
924 | "card_index": "📇",
925 | "card_file_box": "🗃",
926 | "ballot_box": "🗳",
927 | "file_cabinet": "🗄",
928 | "clipboard": "📋",
929 | "file_folder": "📁",
930 | "open_file_folder": "📂",
931 | "card_index_dividers": "🗂",
932 | "newspaper_roll": "🗞",
933 | "newspaper": "📰",
934 | "notebook": "📓",
935 | "notebook_with_decorative_cover": "📔",
936 | "ledger": "📒",
937 | "closed_book": "📕",
938 | "green_book": "📗",
939 | "blue_book": "📘",
940 | "orange_book": "📙",
941 | "books": "📚",
942 | "book": "📖",
943 | "open_book": "📖",
944 | "bookmark": "🔖",
945 | "link": "🔗",
946 | "paperclip": "📎",
947 | "paperclips": "🖇",
948 | "triangular_ruler": "📐",
949 | "straight_ruler": "📏",
950 | "pushpin": "📌",
951 | "round_pushpin": "📍",
952 | "scissors": "✂️",
953 | "pen": "🖊",
954 | "fountain_pen": "🖋",
955 | "black_nib": "✒️",
956 | "paintbrush": "🖌",
957 | "crayon": "🖍",
958 | "memo": "📝",
959 | "pencil": "📝",
960 | "pencil2": "✏️",
961 | "mag": "🔍",
962 | "mag_right": "🔎",
963 | "lock_with_ink_pen": "🔏",
964 | "closed_lock_with_key": "🔐",
965 | "lock": "🔒",
966 | "unlock": "🔓",
967 | "heart": "❤️",
968 | "yellow_heart": "💛",
969 | "green_heart": "💚",
970 | "blue_heart": "💙",
971 | "purple_heart": "💜",
972 | "black_heart": "🖤",
973 | "broken_heart": "💔",
974 | "heavy_heart_exclamation": "❣️",
975 | "two_hearts": "💕",
976 | "revolving_hearts": "💞",
977 | "heartbeat": "💓",
978 | "heartpulse": "💗",
979 | "sparkling_heart": "💖",
980 | "cupid": "💘",
981 | "gift_heart": "💝",
982 | "heart_decoration": "💟",
983 | "peace_symbol": "☮️",
984 | "latin_cross": "✝️",
985 | "star_and_crescent": "☪️",
986 | "om": "🕉",
987 | "wheel_of_dharma": "☸️",
988 | "star_of_david": "✡️",
989 | "six_pointed_star": "🔯",
990 | "menorah": "🕎",
991 | "yin_yang": "☯️",
992 | "orthodox_cross": "☦️",
993 | "place_of_worship": "🛐",
994 | "ophiuchus": "⛎",
995 | "aries": "♈️",
996 | "taurus": "♉️",
997 | "gemini": "♊️",
998 | "cancer": "♋️",
999 | "leo": "♌️",
1000 | "virgo": "♍️",
1001 | "libra": "♎️",
1002 | "scorpius": "♏️",
1003 | "sagittarius": "♐️",
1004 | "capricorn": "♑️",
1005 | "aquarius": "♒️",
1006 | "pisces": "♓️",
1007 | "id": "🆔",
1008 | "atom_symbol": "⚛️",
1009 | "accept": "🉑",
1010 | "radioactive": "☢️",
1011 | "biohazard": "☣️",
1012 | "mobile_phone_off": "📴",
1013 | "vibration_mode": "📳",
1014 | "eight_pointed_black_star": "✴️",
1015 | "vs": "🆚",
1016 | "white_flower": "💮",
1017 | "ideograph_advantage": "🉐",
1018 | "secret": "㊙️",
1019 | "congratulations": "㊗️",
1020 | "u6e80": "🈵",
1021 | "a": "🅰️",
1022 | "b": "🅱️",
1023 | "ab": "🆎",
1024 | "cl": "🆑",
1025 | "o2": "🅾️",
1026 | "sos": "🆘",
1027 | "x": "❌",
1028 | "o": "⭕️",
1029 | "stop_sign": "🛑",
1030 | "no_entry": "⛔️",
1031 | "name_badge": "📛",
1032 | "no_entry_sign": "🚫",
1033 | "anger": "💢",
1034 | "hotsprings": "♨️",
1035 | "no_pedestrians": "🚷",
1036 | "do_not_litter": "🚯",
1037 | "no_bicycles": "🚳",
1038 | "non-potable_water": "🚱",
1039 | "underage": "🔞",
1040 | "no_mobile_phones": "📵",
1041 | "no_smoking": "🚭",
1042 | "exclamation": "❗️",
1043 | "heavy_exclamation_mark": "❗️",
1044 | "grey_exclamation": "❕",
1045 | "question": "❓",
1046 | "grey_question": "❔",
1047 | "bangbang": "‼️",
1048 | "interrobang": "⁉️",
1049 | "low_brightness": "🔅",
1050 | "high_brightness": "🔆",
1051 | "part_alternation_mark": "〽️",
1052 | "warning": "⚠️",
1053 | "children_crossing": "🚸",
1054 | "trident": "🔱",
1055 | "fleur_de_lis": "⚜️",
1056 | "beginner": "🔰",
1057 | "recycle": "♻️",
1058 | "white_check_mark": "✅",
1059 | "chart": "💹",
1060 | "sparkle": "❇️",
1061 | "eight_spoked_asterisk": "✳️",
1062 | "negative_squared_cross_mark": "❎",
1063 | "globe_with_meridians": "🌐",
1064 | "diamond_shape_with_a_dot_inside": "💠",
1065 | "m": "Ⓜ️",
1066 | "cyclone": "🌀",
1067 | "zzz": "💤",
1068 | "atm": "🏧",
1069 | "wc": "🚾",
1070 | "wheelchair": "♿️",
1071 | "parking": "🅿️",
1072 | "sa": "🈂️",
1073 | "passport_control": "🛂",
1074 | "customs": "🛃",
1075 | "baggage_claim": "🛄",
1076 | "left_luggage": "🛅",
1077 | "mens": "🚹",
1078 | "womens": "🚺",
1079 | "baby_symbol": "🚼",
1080 | "restroom": "🚻",
1081 | "put_litter_in_its_place": "🚮",
1082 | "cinema": "🎦",
1083 | "signal_strength": "📶",
1084 | "koko": "🈁",
1085 | "symbols": "🔣",
1086 | "information_source": "ℹ️",
1087 | "abc": "🔤",
1088 | "abcd": "🔡",
1089 | "capital_abcd": "🔠",
1090 | "ng": "🆖",
1091 | "ok": "🆗",
1092 | "up": "🆙",
1093 | "cool": "🆒",
1094 | "new": "🆕",
1095 | "free": "🆓",
1096 | "zero": "0️⃣",
1097 | "one": "1️⃣",
1098 | "two": "2️⃣",
1099 | "three": "3️⃣",
1100 | "four": "4️⃣",
1101 | "five": "5️⃣",
1102 | "six": "6️⃣",
1103 | "seven": "7️⃣",
1104 | "eight": "8️⃣",
1105 | "nine": "9️⃣",
1106 | "keycap_ten": "🔟",
1107 | "hash": "#️⃣",
1108 | "asterisk": "*️⃣",
1109 | "arrow_forward": "▶️",
1110 | "pause_button": "⏸",
1111 | "play_or_pause_button": "⏯",
1112 | "stop_button": "⏹",
1113 | "record_button": "⏺",
1114 | "next_track_button": "⏭",
1115 | "previous_track_button": "⏮",
1116 | "fast_forward": "⏩",
1117 | "rewind": "⏪",
1118 | "arrow_double_up": "⏫",
1119 | "arrow_double_down": "⏬",
1120 | "arrow_backward": "◀️",
1121 | "arrow_up_small": "🔼",
1122 | "arrow_down_small": "🔽",
1123 | "arrow_right": "➡️",
1124 | "arrow_left": "⬅️",
1125 | "arrow_up": "⬆️",
1126 | "arrow_down": "⬇️",
1127 | "arrow_upper_right": "↗️",
1128 | "arrow_lower_right": "↘️",
1129 | "arrow_lower_left": "↙️",
1130 | "arrow_upper_left": "↖️",
1131 | "arrow_up_down": "↕️",
1132 | "left_right_arrow": "↔️",
1133 | "arrow_right_hook": "↪️",
1134 | "leftwards_arrow_with_hook": "↩️",
1135 | "arrow_heading_up": "⤴️",
1136 | "arrow_heading_down": "⤵️",
1137 | "twisted_rightwards_arrows": "🔀",
1138 | "repeat": "🔁",
1139 | "repeat_one": "🔂",
1140 | "arrows_counterclockwise": "🔄",
1141 | "arrows_clockwise": "🔃",
1142 | "musical_note": "🎵",
1143 | "notes": "🎶",
1144 | "heavy_plus_sign": "➕",
1145 | "heavy_minus_sign": "➖",
1146 | "heavy_division_sign": "➗",
1147 | "heavy_multiplication_x": "✖️",
1148 | "heavy_dollar_sign": "💲",
1149 | "currency_exchange": "💱",
1150 | "tm": "™️",
1151 | "copyright": "©️",
1152 | "registered": "®️",
1153 | "wavy_dash": "〰️",
1154 | "curly_loop": "➰",
1155 | "loop": "➿",
1156 | "end": "🔚",
1157 | "back": "🔙",
1158 | "on": "🔛",
1159 | "top": "🔝",
1160 | "soon": "🔜",
1161 | "heavy_check_mark": "✔️",
1162 | "ballot_box_with_check": "☑️",
1163 | "radio_button": "🔘",
1164 | "white_circle": "⚪️",
1165 | "black_circle": "⚫️",
1166 | "red_circle": "🔴",
1167 | "large_blue_circle": "🔵",
1168 | "small_red_triangle": "🔺",
1169 | "small_red_triangle_down": "🔻",
1170 | "small_orange_diamond": "🔸",
1171 | "small_blue_diamond": "🔹",
1172 | "large_orange_diamond": "🔶",
1173 | "large_blue_diamond": "🔷",
1174 | "white_square_button": "🔳",
1175 | "black_square_button": "🔲",
1176 | "black_small_square": "▪️",
1177 | "white_small_square": "▫️",
1178 | "black_medium_small_square": "◾️",
1179 | "white_medium_small_square": "◽️",
1180 | "black_medium_square": "◼️",
1181 | "white_medium_square": "◻️",
1182 | "black_large_square": "⬛️",
1183 | "white_large_square": "⬜️",
1184 | "speaker": "🔈",
1185 | "mute": "🔇",
1186 | "sound": "🔉",
1187 | "loud_sound": "🔊",
1188 | "bell": "🔔",
1189 | "no_bell": "🔕",
1190 | "mega": "📣",
1191 | "loudspeaker": "📢",
1192 | "eye_speech_bubble": "👁🗨",
1193 | "speech_balloon": "💬",
1194 | "thought_balloon": "💭",
1195 | "right_anger_bubble": "🗯",
1196 | "spades": "♠️",
1197 | "clubs": "♣️",
1198 | "hearts": "♥️",
1199 | "diamonds": "♦️",
1200 | "black_joker": "🃏",
1201 | "flower_playing_cards": "🎴",
1202 | "mahjong": "🀄️",
1203 | "clock1": "🕐",
1204 | "clock2": "🕑",
1205 | "clock3": "🕒",
1206 | "clock4": "🕓",
1207 | "clock5": "🕔",
1208 | "clock6": "🕕",
1209 | "clock7": "🕖",
1210 | "clock8": "🕗",
1211 | "clock9": "🕘",
1212 | "clock10": "🕙",
1213 | "clock11": "🕚",
1214 | "clock12": "🕛",
1215 | "clock130": "🕜",
1216 | "clock230": "🕝",
1217 | "clock330": "🕞",
1218 | "clock430": "🕟",
1219 | "clock530": "🕠",
1220 | "clock630": "🕡",
1221 | "clock730": "🕢",
1222 | "clock830": "🕣",
1223 | "clock930": "🕤",
1224 | "clock1030": "🕥",
1225 | "clock1130": "🕦",
1226 | "clock1230": "🕧",
1227 | "white_flag": "🏳️",
1228 | "black_flag": "🏴",
1229 | "checkered_flag": "🏁",
1230 | "triangular_flag_on_post": "🚩",
1231 | "rainbow_flag": "🏳️🌈",
1232 | "afghanistan": "🇦🇫",
1233 | "aland_islands": "🇦🇽",
1234 | "albania": "🇦🇱",
1235 | "algeria": "🇩🇿",
1236 | "american_samoa": "🇦🇸",
1237 | "andorra": "🇦🇩",
1238 | "angola": "🇦🇴",
1239 | "anguilla": "🇦🇮",
1240 | "antarctica": "🇦🇶",
1241 | "antigua_barbuda": "🇦🇬",
1242 | "argentina": "🇦🇷",
1243 | "armenia": "🇦🇲",
1244 | "aruba": "🇦🇼",
1245 | "australia": "🇦🇺",
1246 | "austria": "🇦🇹",
1247 | "azerbaijan": "🇦🇿",
1248 | "bahamas": "🇧🇸",
1249 | "bahrain": "🇧🇭",
1250 | "bangladesh": "🇧🇩",
1251 | "barbados": "🇧🇧",
1252 | "belarus": "🇧🇾",
1253 | "belgium": "🇧🇪",
1254 | "belize": "🇧🇿",
1255 | "benin": "🇧🇯",
1256 | "bermuda": "🇧🇲",
1257 | "bhutan": "🇧🇹",
1258 | "bolivia": "🇧🇴",
1259 | "caribbean_netherlands": "🇧🇶",
1260 | "bosnia_herzegovina": "🇧🇦",
1261 | "botswana": "🇧🇼",
1262 | "brazil": "🇧🇷",
1263 | "british_indian_ocean_territory": "🇮🇴",
1264 | "british_virgin_islands": "🇻🇬",
1265 | "brunei": "🇧🇳",
1266 | "bulgaria": "🇧🇬",
1267 | "burkina_faso": "🇧🇫",
1268 | "burundi": "🇧🇮",
1269 | "cape_verde": "🇨🇻",
1270 | "cambodia": "🇰🇭",
1271 | "cameroon": "🇨🇲",
1272 | "canada": "🇨🇦",
1273 | "canary_islands": "🇮🇨",
1274 | "cayman_islands": "🇰🇾",
1275 | "central_african_republic": "🇨🇫",
1276 | "chad": "🇹🇩",
1277 | "chile": "🇨🇱",
1278 | "cn": "🇨🇳",
1279 | "christmas_island": "🇨🇽",
1280 | "cocos_islands": "🇨🇨",
1281 | "colombia": "🇨🇴",
1282 | "comoros": "🇰🇲",
1283 | "congo_brazzaville": "🇨🇬",
1284 | "congo_kinshasa": "🇨🇩",
1285 | "cook_islands": "🇨🇰",
1286 | "costa_rica": "🇨🇷",
1287 | "cote_divoire": "🇨🇮",
1288 | "croatia": "🇭🇷",
1289 | "cuba": "🇨🇺",
1290 | "curacao": "🇨🇼",
1291 | "cyprus": "🇨🇾",
1292 | "czech_republic": "🇨🇿",
1293 | "denmark": "🇩🇰",
1294 | "djibouti": "🇩🇯",
1295 | "dominica": "🇩🇲",
1296 | "dominican_republic": "🇩🇴",
1297 | "ecuador": "🇪🇨",
1298 | "egypt": "🇪🇬",
1299 | "el_salvador": "🇸🇻",
1300 | "equatorial_guinea": "🇬🇶",
1301 | "eritrea": "🇪🇷",
1302 | "estonia": "🇪🇪",
1303 | "ethiopia": "🇪🇹",
1304 | "eu": "🇪🇺",
1305 | "european_union": "🇪🇺",
1306 | "falkland_islands": "🇫🇰",
1307 | "faroe_islands": "🇫🇴",
1308 | "fiji": "🇫🇯",
1309 | "finland": "🇫🇮",
1310 | "fr": "🇫🇷",
1311 | "french_guiana": "🇬🇫",
1312 | "french_polynesia": "🇵🇫",
1313 | "french_southern_territories": "🇹🇫",
1314 | "gabon": "🇬🇦",
1315 | "gambia": "🇬🇲",
1316 | "georgia": "🇬🇪",
1317 | "de": "🇩🇪",
1318 | "ghana": "🇬🇭",
1319 | "gibraltar": "🇬🇮",
1320 | "greece": "🇬🇷",
1321 | "greenland": "🇬🇱",
1322 | "grenada": "🇬🇩",
1323 | "guadeloupe": "🇬🇵",
1324 | "guam": "🇬🇺",
1325 | "guatemala": "🇬🇹",
1326 | "guernsey": "🇬🇬",
1327 | "guinea": "🇬🇳",
1328 | "guinea_bissau": "🇬🇼",
1329 | "guyana": "🇬🇾",
1330 | "haiti": "🇭🇹",
1331 | "honduras": "🇭🇳",
1332 | "hong_kong": "🇭🇰",
1333 | "hungary": "🇭🇺",
1334 | "iceland": "🇮🇸",
1335 | "india": "🇮🇳",
1336 | "indonesia": "🇮🇩",
1337 | "iran": "🇮🇷",
1338 | "iraq": "🇮🇶",
1339 | "ireland": "🇮🇪",
1340 | "isle_of_man": "🇮🇲",
1341 | "israel": "🇮🇱",
1342 | "it": "🇮🇹",
1343 | "jamaica": "🇯🇲",
1344 | "jp": "🇯🇵",
1345 | "crossed_flags": "🎌",
1346 | "jersey": "🇯🇪",
1347 | "jordan": "🇯🇴",
1348 | "kazakhstan": "🇰🇿",
1349 | "kenya": "🇰🇪",
1350 | "kiribati": "🇰🇮",
1351 | "kosovo": "🇽🇰",
1352 | "kuwait": "🇰🇼",
1353 | "kyrgyzstan": "🇰🇬",
1354 | "laos": "🇱🇦",
1355 | "latvia": "🇱🇻",
1356 | "lebanon": "🇱🇧",
1357 | "lesotho": "🇱🇸",
1358 | "liberia": "🇱🇷",
1359 | "libya": "🇱🇾",
1360 | "liechtenstein": "🇱🇮",
1361 | "lithuania": "🇱🇹",
1362 | "luxembourg": "🇱🇺",
1363 | "macau": "🇲🇴",
1364 | "macedonia": "🇲🇰",
1365 | "madagascar": "🇲🇬",
1366 | "malawi": "🇲🇼",
1367 | "malaysia": "🇲🇾",
1368 | "maldives": "🇲🇻",
1369 | "mali": "🇲🇱",
1370 | "malta": "🇲🇹",
1371 | "marshall_islands": "🇲🇭",
1372 | "martinique": "🇲🇶",
1373 | "mauritania": "🇲🇷",
1374 | "mauritius": "🇲🇺",
1375 | "mayotte": "🇾🇹",
1376 | "mexico": "🇲🇽",
1377 | "micronesia": "🇫🇲",
1378 | "moldova": "🇲🇩",
1379 | "monaco": "🇲🇨",
1380 | "mongolia": "🇲🇳",
1381 | "montenegro": "🇲🇪",
1382 | "montserrat": "🇲🇸",
1383 | "morocco": "🇲🇦",
1384 | "mozambique": "🇲🇿",
1385 | "myanmar": "🇲🇲",
1386 | "namibia": "🇳🇦",
1387 | "nauru": "🇳🇷",
1388 | "nepal": "🇳🇵",
1389 | "netherlands": "🇳🇱",
1390 | "new_caledonia": "🇳🇨",
1391 | "new_zealand": "🇳🇿",
1392 | "nicaragua": "🇳🇮",
1393 | "niger": "🇳🇪",
1394 | "nigeria": "🇳🇬",
1395 | "niue": "🇳🇺",
1396 | "norfolk_island": "🇳🇫",
1397 | "northern_mariana_islands": "🇲🇵",
1398 | "north_korea": "🇰🇵",
1399 | "norway": "🇳🇴",
1400 | "oman": "🇴🇲",
1401 | "pakistan": "🇵🇰",
1402 | "palau": "🇵🇼",
1403 | "palestinian_territories": "🇵🇸",
1404 | "panama": "🇵🇦",
1405 | "papua_new_guinea": "🇵🇬",
1406 | "paraguay": "🇵🇾",
1407 | "peru": "🇵🇪",
1408 | "philippines": "🇵🇭",
1409 | "pitcairn_islands": "🇵🇳",
1410 | "poland": "🇵🇱",
1411 | "portugal": "🇵🇹",
1412 | "puerto_rico": "🇵🇷",
1413 | "qatar": "🇶🇦",
1414 | "reunion": "🇷🇪",
1415 | "romania": "🇷🇴",
1416 | "ru": "🇷🇺",
1417 | "rwanda": "🇷🇼",
1418 | "st_barthelemy": "🇧🇱",
1419 | "st_helena": "🇸🇭",
1420 | "st_kitts_nevis": "🇰🇳",
1421 | "st_lucia": "🇱🇨",
1422 | "st_pierre_miquelon": "🇵🇲",
1423 | "st_vincent_grenadines": "🇻🇨",
1424 | "samoa": "🇼🇸",
1425 | "san_marino": "🇸🇲",
1426 | "sao_tome_principe": "🇸🇹",
1427 | "saudi_arabia": "🇸🇦",
1428 | "senegal": "🇸🇳",
1429 | "serbia": "🇷🇸",
1430 | "seychelles": "🇸🇨",
1431 | "sierra_leone": "🇸🇱",
1432 | "singapore": "🇸🇬",
1433 | "sint_maarten": "🇸🇽",
1434 | "slovakia": "🇸🇰",
1435 | "slovenia": "🇸🇮",
1436 | "solomon_islands": "🇸🇧",
1437 | "somalia": "🇸🇴",
1438 | "south_africa": "🇿🇦",
1439 | "south_georgia_south_sandwich_islands": "🇬🇸",
1440 | "kr": "🇰🇷",
1441 | "south_sudan": "🇸🇸",
1442 | "es": "🇪🇸",
1443 | "sri_lanka": "🇱🇰",
1444 | "sudan": "🇸🇩",
1445 | "suriname": "🇸🇷",
1446 | "swaziland": "🇸🇿",
1447 | "sweden": "🇸🇪",
1448 | "switzerland": "🇨🇭",
1449 | "syria": "🇸🇾",
1450 | "taiwan": "🇹🇼",
1451 | "tajikistan": "🇹🇯",
1452 | "tanzania": "🇹🇿",
1453 | "thailand": "🇹🇭",
1454 | "timor_leste": "🇹🇱",
1455 | "togo": "🇹🇬",
1456 | "tokelau": "🇹🇰",
1457 | "tonga": "🇹🇴",
1458 | "trinidad_tobago": "🇹🇹",
1459 | "tunisia": "🇹🇳",
1460 | "tr": "🇹🇷",
1461 | "turkmenistan": "🇹🇲",
1462 | "turks_caicos_islands": "🇹🇨",
1463 | "tuvalu": "🇹🇻",
1464 | "uganda": "🇺🇬",
1465 | "ukraine": "🇺🇦",
1466 | "united_arab_emirates": "🇦🇪",
1467 | "gb": "🇬🇧",
1468 | "uk": "🇬🇧",
1469 | "us": "🇺🇸",
1470 | "us_virgin_islands": "🇻🇮",
1471 | "uruguay": "🇺🇾",
1472 | "uzbekistan": "🇺🇿",
1473 | "vanuatu": "🇻🇺",
1474 | "vatican_city": "🇻🇦",
1475 | "venezuela": "🇻🇪",
1476 | "vietnam": "🇻🇳",
1477 | "wallis_futuna": "🇼🇫",
1478 | "western_sahara": "🇪🇭",
1479 | "yemen": "🇾🇪",
1480 | "zambia": "🇿🇲",
1481 | "zimbabwe": "🇿🇼"
1482 | }
--------------------------------------------------------------------------------