comments2

https://m.weibo.cn/comments/hotflow?id=4383183661430868&mid=4383183661430868&max_id_type=0

]*/>' 10 | result = re.findall(pattern, line) 11 | for r in result: 12 | pattern_1 = 'alt=.*?.]' 13 | p1 = re.findall(pattern_1, r) 14 | x = p1[0].split("[") 15 | xx = x[1].replace("]", "") 16 | # print(xx) 17 | pattern_2 = 'h5.sinaimg.*?\.png' 18 | p2 = re.findall(pattern_2, r) 19 | # print(p2[0]) 20 | img[xx] = p2[0] 21 | 22 | print(img) 23 | url_list = "http://h5.sinaimg.cn/m/emoticon/icon/default/d_tu-b5c18d9140.png" 24 | save_path = 'weibo\\Images\\1.png' 25 | 26 | 27 | for name in img.keys(): 28 | save_path = 'weibo\\Images\\' + str(name) + '.png' 29 | url_list = img[name] 30 | pic_file = urllib.request.urlopen("http://" + url_list).read() 31 | f = open(save_path, "wb") 32 | f.write(pic_file) 33 | f.close() -------------------------------------------------------------------------------- /old/progress.py: -------------------------------------------------------------------------------- 1 | crawled = [] 2 | crawling = [1669879400] 3 | -------------------------------------------------------------------------------- /old/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # 访问 m.weibo.cn 的 request headers 4 | DEFAULT_REQUEST_HEADERS = { 5 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:61.0) Gecko/20100101 Firefox/61.0' 6 | } 7 | 8 | # info for https://passport.weibo.cn/signin/login 9 | USER_PASSWORD = [ 10 | { 11 | 'user': 'xxxxx', 12 | 'password': 'xxxxx' 13 | } 14 | ] 15 | 16 | # Chrome driver 的路径，前往 http://chromedriver.chromium.org/downloads 下载 17 | CHROME_DRIVER_PATH = "D:\\software\\chromedriver.exe" 18 | 19 | # 请求次数，每次请求返回的数据大约10条，因具体 API 而异 20 | WAITING_FOR_REQUESTS = 1 21 | # 停多少秒 22 | DELAY = 2 23 | # 允许同时爬取的最多用户数量 24 | MAX_CRAWING_USERS = 1 25 | -------------------------------------------------------------------------------- /old/spider.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | import settings, requests, sys, traceback, json, os, time, random, csv, tqdm, threading 3 | import logging, datetime 4 | from lxml import etree 5 | import progress 6 | from selenium import webdriver, common 7 | 8 | INFO_URL = 'https://m.weibo.cn/api/container/getIndex?type=uid&value={}' 9 | WEIBO_URL = 'https://m.weibo.cn/api/container/getIndex?type=uid&value={}&containerid={}&page={}' 10 | LONG_WEIBO_URL = 'https://m.weibo.cn/statuses/extend?id={}' 11 | COMMENT_URL = 'https://m.weibo.cn/api/comments/show?id={}&page={}' 12 | COMMENT_URL1 = 'https://m.weibo.cn/comments/hotflow?id={}&mid={}&max_id_type={}' 13 | COMMENT_URL2 = 'https://m.weibo.cn/comments/hotflow?id={}&mid={}&max_id_type={}&max_id={}' 14 | 15 | # 现在爬取第几次 16 | num_requested = 0 17 | # 正在爬取的用户 18 | crawling_user_ids = [] 19 | # 等待爬取的用户 20 | waiting_user_ids = [] 21 | # 已经爬取完毕的用户 22 | crawled_user_ids = progress.crawled 23 | 24 | current_user_index = 0 25 | 26 | 27 | # http://nladuo.github.io/2018/12/08/那些年，我爬过的北科-六-——反反爬虫之js渲染 28 | # 微博评论貌似有反爬机制，因此使用『无头浏览器』，模仿 Chrome 浏览器 29 | # 比如这个请求在 Chrome 就 okay，但是用普通的 request 就会返回登陆的 html： 30 | # https://m.weibo.cn/comments/hotflow?id=1669879400&mid=4384122253963002&max_id_type=0&max_id=261294286701954 31 | # 最后发现其实是需要 Cookie，我在 Chrome Driver 里面登录来获取 Cookie（第二页以及以后的均需要 Cookie） 32 | # selenium 的使用参考文档 https://selenium-python.readthedocs.io 33 | driver = webdriver.Chrome(settings.CHROME_DRIVER_PATH) 34 | logged = False 35 | 36 | def login(): 37 | driver.get('https://passport.weibo.cn/signin/login') 38 | print('请进入 https://passport.weibo.cn/signin/login 后按 Enter 键继续') 39 | os.system("pause") 40 | driver.find_element_by_id('loginName').send_keys(settings.USER_PASSWORD[current_user_index]['user']) 41 | driver.find_element_by_id('loginPassword').send_keys(settings.USER_PASSWORD[current_user_index]['password']) 42 | # https://stackoverflow.com/questions/21350605/python-selenium-click-on-button 43 | # driver.find_element_by_id('loginAction').click() 44 | print(f'密码已经填好，请完成登录之后按 Enter 继续（可能需要人工拖动滑块验证）') 45 | os.system("pause") 46 | logged = True 47 | 48 | # See https://osf.io/upav8/ 49 | # Line by line https://github.com/rkern/line_profiler 50 | import cProfile, pstats, io 51 | def profile(fnc): 52 | 53 | """A decorator that uses cProfile to profile a function""" 54 | 55 | def inner(*args, **kwargs): 56 | 57 | pr = cProfile.Profile() 58 | pr.enable() 59 | retval = fnc(*args, **kwargs) 60 | pr.disable() 61 | s = io.StringIO() 62 | sortby = 'cumulative' 63 | ps = pstats.Stats(pr, stream=s).sort_stats(sortby) 64 | ps.print_stats() 65 | logging.info(s.getvalue()) 66 | return retval 67 | 68 | return inner 69 | 70 | class WBSpider(): 71 | @staticmethod 72 | def spide(user_id=1669879400): 73 | """ 74 | user_id 可以改成任意合法的用户id 75 | """ 76 | # 使用实例,输入一个用户id，所有信息都会存储在wb实例中 77 | pic_download = 0 # 值为0代表不下载微博原始图片,1代表下载微博原始图片 78 | wb = WBSpider(user_id, pic_download) # 调用Weibo类，创建微博实例wb 79 | t1 = threading.Thread(target=wb.start) 80 | t1.start() 81 | # try: 82 | # _thread.start_new_thread(wb.start, ("ssssss",)) 83 | # except Exception as e: 84 | # logging.error(str(e)) 85 | # logging.error("Error: unable to start thread") 86 | 87 | def __init__(self, user_id, pic_download=0): 88 | """Weibo类初始化""" 89 | if not isinstance(user_id, int): 90 | sys.exit(u"user_id值应为一串数字形式,请重新输入") 91 | if pic_download != 0 and pic_download != 1: 92 | sys.exit(u"pic_download值应为0或1,请重新输入") 93 | self.user_id = user_id # 用户id,即需要我们输入的数字,如昵称为"Dear-迪丽热巴"的id为1669879400 94 | self.pic_download = pic_download # 取值范围为0、1,程序默认值为0,代表不下载微博原始图片,1代表下载 95 | # 根据抓取信息赋值 96 | self.userInfo = None 97 | self.weobo_containerid = None 98 | self.total_pages = 0 99 | self.all_cards = [] 100 | self.all_comments = [] 101 | self.got_num = 0 102 | self.comments_got_num = 0 103 | self.got_comments_mids = [] 104 | 105 | def request_data(self, url): 106 | logging.info(f'Requesting {url}...') 107 | driver.get(url) 108 | # driver.save_screenshot("screenshot.png") # 截个图 109 | # print(driver.page_source) # 打印源码 110 | try: 111 | json_str = driver.find_element_by_css_selector("pre").text 112 | except: 113 | logging.info(f'{url} 的请求结果无法转为 json：{driver.page_source}') 114 | try: 115 | driver.find_element_by_xpath("//body/div/p[@class='h5-4con']") 116 | print('请求过于频繁，将切换当前使用的用户重新登录...') 117 | global current_user_index 118 | current_user_index = (current_user_index + 1) % len(settings.USER_PASSWORD) 119 | login() 120 | except common.exceptions.NoSuchElementException: 121 | # 不是请求过于频繁的话，目前默认是没有正确登录 122 | login() 123 | return self.request_data(url) 124 | # print(text) 125 | res_obj = json.loads(json_str, encoding='UTF-8') 126 | 127 | global num_requested 128 | num_requested += 1 129 | if num_requested % settings.WAITING_FOR_REQUESTS == 0: 130 | logging.info(f'当前已经请求 {num_requested} 次，等待 {settings.DELAY} 秒') 131 | time.sleep(settings.DELAY) 132 | 133 | # response = requests.get(url, headers=settings.DEFAULT_REQUEST_HEADERS) 134 | # if response.status_code >= 400: 135 | # rand = random.randint(6, 10) 136 | # logging.info(f"出现错误状态码 {response.status_code}，等待 {rand} 秒") 137 | # time.sleep(rand) 138 | # return self.request_data(url) 139 | # try: 140 | # data = response.json() 141 | # except Exception as e: 142 | # rand = random.randint(6, 10) 143 | # logging.info(f"出现 HTML {response.content}，等待 {rand} 秒") 144 | # time.sleep(rand) 145 | # return self.request_data(url) 146 | # if res_obj == None: 147 | # rand = random.randint(6, 10) 148 | # logging.info(f"出现状态码 {res_obj.status_code}，空返回结果，等待 {rand} 秒") 149 | # time.sleep(rand) 150 | # return self.request_data(url) 151 | # if data["ok"] == 0: 152 | # if data["msg"] == "请求过于频繁,歇歇吧": 153 | # rand = random.randint(6, 10) 154 | # logging.info(f"访问被限制，等待 {rand} 秒") 155 | # time.sleep(rand) 156 | # return self.request_data(url) 157 | # assert data["ok"] == 1 158 | # 有时候会得到： 159 | # {'ok': 0, 'msg': '这里还没有内容', 'data': {'cards': []}} 160 | return res_obj["data"] 161 | 162 | def save_json(self, data, type='userinfo.json'): 163 | json_path = self.get_filepath(type) 164 | with open(json_path, 'w', encoding='utf-8') as outfile: 165 | json.dump(data, outfile, ensure_ascii=False, indent=2) 166 | 167 | def start(self): 168 | """运行爬虫""" 169 | try: 170 | if crawling_user_ids.count(self.user_id): 171 | logging.warning(f'uid {self.user_id} 正在爬取，拒绝加入到爬取池') 172 | return 173 | elif crawled_user_ids.count(self.user_id): 174 | logging.warning(f'uid {self.user_id} 已经爬取完毕，拒绝加入到爬取池') 175 | return 176 | elif waiting_user_ids.count(self.user_id): 177 | logging.warning(f'uid {self.user_id} 正在等待被抓取，拒绝加入到爬取池') 178 | return 179 | elif len(crawling_user_ids) < settings.MAX_CRAWING_USERS: 180 | crawling_user_ids.append(self.user_id) 181 | logging.info(f'uid {self.user_id} 加入到爬取池') 182 | else: 183 | waiting_user_ids.append(self.user_id) 184 | logging.info(f'uid {self.user_id} 加入到等待池') 185 | return 186 | if not self.get_user_info(): 187 | logging.error('获取用户信息中断') 188 | return 189 | if not self.get_all_weibo(): 190 | logging.error('获取所有微博中断') 191 | return 192 | crawling_user_ids.remove(self.user_id) 193 | crawled_user_ids.append(self.user_id) 194 | # 存成文件避免下次启动时重复爬取 195 | with open('./progress.py', 'w', encoding='UTF-8') as crawled_py: 196 | crawled_py.write(f'crawled = {crawled_user_ids}\n') 197 | crawled_py.write(f'crawling = {crawling_user_ids}\n') 198 | logging.info(f"【uid: {self.user_id}, 昵称: {self.userInfo['screen_name']}】爬取完毕，已经从爬取池中删除") 199 | if len(waiting_user_ids) > 0: 200 | wuid = waiting_user_ids.pop() 201 | logging.info(f"uid {self.user_id} 被取出等待池中") 202 | WBSpider.spide(wuid) 203 | logging.info("*" * 100) 204 | if self.pic_download == 1: 205 | self.download_pictures() 206 | except Exception as e: 207 | logging.error(str(e)) 208 | traceback.print_exc() 209 | 210 | def get_user_info(self): 211 | """获取用户信息""" 212 | try: 213 | url = INFO_URL.format(self.user_id) 214 | data = self.request_data(url) 215 | self.userInfo = data["userInfo"] 216 | self.weobo_containerid = data["tabsInfo"]["tabs"][1]["containerid"] 217 | self.save_json(data) 218 | return True 219 | except Exception as e: 220 | logging.error(str(e)) 221 | traceback.print_exc() 222 | 223 | def get_comments(self, mid, max_num=200): 224 | """ 225 | max_num: 因为很多重复评论没有意义，所以最多抓取 200 条即可 226 | """ 227 | if self.got_comments_mids.count(mid): 228 | logging.info(f'mid={mid} 的评论已经收集好了，不再收集') 229 | return 230 | url = COMMENT_URL1.format(self.user_id, mid, 0) 231 | data = self.request_data(url) 232 | max_id = data["max_id"] 233 | max_id_type = data["max_id_type"] 234 | wrote_num = 0 235 | page = 0 236 | while True: 237 | page += 1 238 | url = COMMENT_URL2.format(self.user_id, mid, max_id_type, max_id) 239 | data = self.request_data(url) 240 | if data == None: 241 | break 242 | max_id = data["max_id"] 243 | max_id_type = data["max_id_type"] 244 | for comment in data["data"]: 245 | # print(comment) 246 | # exit() 247 | selector = etree.HTML(comment["text"]) 248 | self.all_comments.append({'mid': mid, 'id': comment["id"], 'created_at': comment["created_at"], 'user_id': comment["user"]["id"], 'text': etree.tostring(selector, method="text", encoding="UTF-8").decode('utf-8'), 'img_emoji': selector.xpath("//span/img/@alt")}) 249 | self.comments_got_num += 1 250 | 251 | # 把评论用户也加入爬取池 252 | uid = comment["user"]["id"] 253 | uname = comment["user"]["screen_name"] 254 | logging.info(f'尝试抓取评论者 {uid}, {uname}') 255 | WBSpider.spide 256 | 257 | if page % 20 == 0: # 每爬20页写入一次文件 258 | # 写文件 259 | if self.comments_got_num > wrote_num: 260 | self.write_comment_csv(wrote_num, type=f"{mid}.comments.csv") 261 | wrote_num = self.comments_got_num 262 | 263 | # 评论已经够用了 264 | if self.comments_got_num >= max_num: 265 | break 266 | # 经测试，这是评论终止条件 267 | if max_id == 0 and max_id_type == 0: 268 | break 269 | 270 | self.write_comment_csv(wrote_num, type=f"{mid}.comments.csv") # 将剩余不足20页的评论写入文件 271 | logging.info(f"用户 {self.user_id} 的微博 {mid} 共爬取" + str(self.comments_got_num) + u"条评论") 272 | self.all_comments = [] 273 | self.comments_got_num = 0 274 | 275 | def get_all_weibo(self): 276 | """获取用户信息""" 277 | try: 278 | url = WEIBO_URL.format(self.user_id, self.weobo_containerid, 1) 279 | data = self.request_data(url) 280 | self.total_pages = int(self.userInfo["statuses_count"]/10)+1 281 | self.save_json(data, type='cards1.json') 282 | 283 | page1 = 0 284 | wrote_num = 0 285 | for i in tqdm.tqdm(range(self.total_pages), desc=f"【uid: {self.user_id}, 昵称: {self.userInfo['screen_name']}】爬取进度"): 286 | logging.info(f"【uid: {self.user_id}, 昵称: {self.userInfo['screen_name']}】爬取进度：{i+1}/{self.total_pages}") 287 | page = i+1 288 | url = WEIBO_URL.format(self.user_id, self.weobo_containerid, page) 289 | data = self.request_data(url) 290 | for card in data['cards']: 291 | # 忽略广告等其他卡片 292 | if card["card_type"] != 9: 293 | continue 294 | mblog = card["mblog"] 295 | # 如果是转发微博的话，就抓取被转发者 296 | if "retweeted_status" in mblog: 297 | uid = mblog["retweeted_status"]["user"]["id"] 298 | uname = mblog["retweeted_status"]["user"]["screen_name"] 299 | logging.info(f'尝试抓取被转发者 {uid}, {uname}') 300 | WBSpider.spide(user_id=uid) 301 | continue 302 | selector = etree.HTML(mblog["text"]) 303 | a_text = selector.xpath("//a/text()") 304 | if u"全文" in a_text: 305 | if None == self.get_long_weibo(mblog["mid"]): 306 | logging.error(f'无法获取全文，uid: {self.user_id}，页数: {page}，微博：{mblog["mid"]}') 307 | logging.error(f'其短文为 {mblog["text"]}') 308 | else: 309 | mblog["text"] = self.get_long_weibo(mblog["mid"]) 310 | selector = etree.HTML(mblog["text"]) 311 | # 将 HTML 转换为 txt 312 | # 参考 https://www.zybuluo.com/Alston/note/778377 313 | mblog["img_emoji"] = selector.xpath("//span/img/@alt") 314 | mblog["text"] = etree.tostring(selector, method="text", encoding="UTF-8").decode('utf-8') 315 | self.all_cards.append(card) 316 | self.got_num += 1 317 | # 抓取评论 318 | self.get_comments(mblog["mid"]) 319 | 320 | if page % 20 == 0: # 每爬20页写入一次文件 321 | # 写文件 322 | if self.got_num > wrote_num: 323 | self.write_csv(wrote_num) 324 | wrote_num = self.got_num 325 | 326 | self.write_csv(wrote_num) # 将剩余不足20页的微博写入文件 327 | logging.info(f"用户 {user_id} 共爬取" + str(self.got_num) + u"条原创微博") 328 | return True 329 | except Exception as e: 330 | logging.error(str(e)) 331 | traceback.print_exc() 332 | 333 | def get_long_weibo(self, mid): 334 | """获取长微博""" 335 | try: 336 | url = LONG_WEIBO_URL.format(mid) 337 | data = self.request_data(url) 338 | long_content = data["longTextContent"] 339 | return long_content 340 | except Exception as e: 341 | logging.error(str(e)) 342 | traceback.print_exc() 343 | 344 | def write_comment_csv(self, wrote_num, type): 345 | """将爬取的评论写入txt文件""" 346 | try: 347 | result_headers = [ 348 | "mid", 349 | "评论 id", 350 | "评论人 id", 351 | "评论时间", 352 | "评论内容", 353 | "包含图片表情" 354 | ] 355 | result_data = [ [ comment["mid"], comment["id"], comment["user_id"], comment["created_at"], comment["text"], comment['img_emoji'] ] for comment in self.all_comments][wrote_num:] 356 | with open(self.get_filepath(type), 357 | "a", 358 | encoding="utf-8", 359 | newline="") as f: 360 | writer = csv.writer(f) 361 | if wrote_num == 0: 362 | writer.writerows([result_headers]) 363 | writer.writerows(result_data) 364 | assert self.comments_got_num == len(self.all_comments) 365 | print(u"%d条评论写入csv文件完毕,保存路径:" % self.comments_got_num) 366 | print(self.get_filepath(type)) 367 | except Exception as e: 368 | logging.error(str(e)) 369 | traceback.print_exc() 370 | 371 | def write_csv(self, wrote_num): 372 | """将爬取的信息写入txt文件""" 373 | try: 374 | result_headers = [ 375 | "mid", 376 | "发布时间", 377 | "微博内容", 378 | "包含图片表情" 379 | ] 380 | result_data = [ [card["mblog"]["mid"], card["mblog"]["created_at"], card["mblog"]["text"], card["mblog"]['img_emoji'] ] for card in self.all_cards][wrote_num:] 381 | with open(self.get_filepath("csv"), 382 | "a", 383 | encoding="utf-8", 384 | newline="") as f: 385 | writer = csv.writer(f) 386 | if wrote_num == 0: 387 | writer.writerows([result_headers]) 388 | writer.writerows(result_data) 389 | assert self.got_num == len(self.all_cards) 390 | print(u"%d条微博写入csv文件完毕,保存路径:" % self.got_num) 391 | print(self.get_filepath("csv")) 392 | except Exception as e: 393 | logging.error(str(e)) 394 | traceback.print_exc() 395 | 396 | def download_pictures(self): 397 | pass 398 | 399 | def get_filepath(self, type): 400 | """获取结果文件路径""" 401 | try: 402 | file_dir = os.path.dirname(os.path.realpath('__file__')) + os.sep + "weibo" + os.sep + self.userInfo["screen_name"] 403 | # 图片的话就是 'img' 目录 404 | if type == "img": 405 | file_dir = file_dir + os.sep + "img" 406 | # 没有目录的时候自动创建 407 | if not os.path.isdir(file_dir): 408 | os.makedirs(file_dir) 409 | if type == "img": 410 | return file_dir 411 | # 其他类型的话，直接返回 user_id.csv 412 | file_path = file_dir + os.sep + "%d" % self.user_id + "." + type 413 | return file_path 414 | except Exception as e: 415 | logging.error(str(e)) 416 | traceback.print_exc() 417 | 418 | def init_logging(name='testname', log_level=logging.INFO): 419 | file_dir = os.path.dirname(os.path.realpath('__file__')) + "/log" 420 | # 没有目录的时候自动创建 421 | if not os.path.isdir(file_dir): 422 | os.makedirs(file_dir) 423 | fileh = logging.FileHandler(file_dir+f'/{name}-{logging.getLevelName(log_level)}.log', 'w', encoding='utf-8') 424 | formatter = logging.Formatter("%(asctime)s;%(levelname)s;%(message)s", 425 | "%Y-%m-%d %H:%M:%S") 426 | fileh.setFormatter(formatter) 427 | 428 | log = logging.getLogger() # root logger 429 | for hdlr in log.handlers[:]: # remove all old handlers 430 | log.removeHandler(hdlr) 431 | log.addHandler(fileh) # set the new handler 432 | log.setLevel(log_level) 433 | 434 | return fileh 435 | 436 | if __name__ == "__main__": 437 | init_logging(name=datetime.datetime.now().strftime("%Y-%m-%d %H-%M-%S")) 438 | login() 439 | for user_id in progress.crawling: 440 | WBSpider.spide(user_id) 441 | -------------------------------------------------------------------------------- /old/spider_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from spider import WBSpider 3 | 4 | # 使用实例,输入一个用户id，所有信息都会存储在wb实例中 5 | user_id = 1669879400 # 可以改成任意合法的用户id（爬虫的微博id除外） 6 | filter = 0 # 值为0表示爬取全部微博（原创微博+转发微博），值为1表示只爬取原创微博 7 | pic_download = 0 # 值为0代表不下载微博原始图片,1代表下载微博原始图片 8 | wb = WBSpider(user_id, filter, pic_download) # 调用Weibo类，创建微博实例wb 9 | wb.start() # 爬取微博信息 10 | 11 | class TestStringMethods(unittest.TestCase): 12 | 13 | # def test_get_user_info(self): 14 | # wb.get_user_info() 15 | # assert wb.userInfo["screen_name"] == 'Dear-迪丽热巴' 16 | # assert wb.weobo_containerid == "1076031669879400" 17 | def test_start(self): 18 | assert wb.total_pages > 0 19 | wb.start() 20 | 21 | if __name__ == '__main__': 22 | unittest.main() -------------------------------------------------------------------------------- /src/download_img.py: -------------------------------------------------------------------------------- 1 | from emoji import * 2 | import requests 3 | 4 | (img_desc_arr, img_link_arr) = get_img_emojis() 5 | 6 | CUR_DIR = os.path.dirname(os.path.realpath('__file__')) + os.sep 7 | 8 | print(img_desc_arr) 9 | 10 | def download_pic(name, url): 11 | p = requests.get(url) 12 | with open(CUR_DIR + '/imgs/' + name + '.png', "wb") as f: 13 | f.write(p.content) 14 | 15 | for i in range(len(img_desc_arr)): 16 | download_pic(f'img_{i}', img_link_arr[i]) -------------------------------------------------------------------------------- /src/emoji.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -* 2 | 3 | from lxml import etree 4 | import os, json 5 | from util.Bmob import * 6 | from util.table import * 7 | import time 8 | 9 | CUR_DIR = os.path.dirname(os.path.realpath('__file__')) + os.sep 10 | BmobSDK.setup(config["bmob"]["APP_ID"], config["bmob"]["REST_API_KEY"]) 11 | 12 | def get_img_emojis(): 13 | """ 14 | 返回两个数组，分别包含表情描述、表情图片网址 15 | """ 16 | data = None 17 | with open(CUR_DIR + 'files/imgEmoji.html', 'r', encoding='UTF-8') as file: 18 | data = file.read() 19 | 20 | selector = etree.HTML(data) 21 | 22 | img_desc_arr = selector.xpath('//span/img/@alt') 23 | img_link_arr = selector.xpath('//span/img/@src') 24 | return (img_desc_arr, img_link_arr) 25 | 26 | def get_unicode_emojis(): 27 | """ 28 | 返回两个数组，分别包含表情英文描述、表情 Unicode 表示 29 | 实测不太好用，只要是收集描述、表情的数据长度不匹配 30 | """ 31 | data = None 32 | with open(CUR_DIR + 'files/unicodeEmoji.html', 'r', encoding='UTF-8') as file: 33 | data = file.read() 34 | 35 | selector = etree.HTML(data) 36 | 37 | unicode_emoji_arr = selector.xpath('//div[@class="emojis"]/text()') 38 | 39 | unicode_desc_arr = [[] for i in range(5)] 40 | index = 0 41 | started = False 42 | with open(CUR_DIR + 'files/unicodeDesc.html', 'r', encoding='UTF-8') as file: 43 | for data in file: 44 | if data == '\n': 45 | started = not started 46 | # 结束后将 index 加一 47 | if started == False and len(unicode_desc_arr[index]) > 0: 48 | index += 1 49 | elif started and data.startswith(':'): 50 | # 去掉第一个冒号，最后一个冒号和换行符号 51 | unicode_desc_arr[index].append(data[1:len(data)-2]) 52 | else: 53 | # 遇到 People 记得置为 False 54 | started = False 55 | lens = 0 56 | for i in range(5): 57 | print(f'len of desc {i}: {len(unicode_desc_arr[i])}') 58 | lens += len(unicode_desc_arr[i]) 59 | print(f'len of emoji {i}: {len(unicode_emoji_arr[i])}') 60 | print(lens) 61 | return (unicode_desc_arr, unicode_emoji_arr) 62 | 63 | def get_unicode_emojis_from_mdit(): 64 | """ 65 | 返回 dict，以表情描述为 key、表情 Unicode 为 value 66 | """ 67 | data = None 68 | # 此文件复制于 https://github.com/markdown-it/markdown-it-emoji/blob/master/lib/data/full.json 69 | with open(CUR_DIR + 'files/unicode.json', 'r', encoding='UTF-8') as file: 70 | data = json.load(file) 71 | return data 72 | 73 | def insert_img_emojis(): 74 | (img_desc_arr, img_link_arr) = get_img_emojis() 75 | for i in range(len(img_desc_arr)): 76 | emoji = Emoji(desc=img_desc_arr[i], content=img_link_arr[i]) 77 | emoji.save() 78 | 79 | def insert_unicode_emojis(): 80 | data = get_unicode_emojis_from_mdit() 81 | for key in data.keys(): 82 | emoji = Emoji(desc=key, content=data[key]) 83 | emoji.save() 84 | 85 | 86 | if __name__ == "__main__": 87 | insert_unicode_emojis() 88 | insert_img_emojis() -------------------------------------------------------------------------------- /src/files/imgEmoji.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |

[左哼哼]

[加油]

→_→

🙃🤓🤗😱👿👻💩
🙈🙉🙊🙅🙋👏🙏
🎄⚡🍉🍗💊💣
[带着微博去旅行]
[胖丁微笑]

", -------------------------------------------------------------------------------- /src/files/unicode.json: -------------------------------------------------------------------------------- 1 | { 2 | "100": "💯", 3 | "1234": "🔢", 4 | "grinning": "😀", 5 | "smiley": "😃", 6 | "smile": "😄", 7 | "grin": "😁", 8 | "laughing": "😆", 9 | "satisfied": "😆", 10 | "sweat_smile": "😅", 11 | "joy": "😂", 12 | "rofl": "🤣", 13 | "relaxed": "☺️", 14 | "blush": "😊", 15 | "innocent": "😇", 16 | "slightly_smiling_face": "🙂", 17 | "upside_down_face": "🙃", 18 | "wink": "😉", 19 | "relieved": "😌", 20 | "heart_eyes": "😍", 21 | "kissing_heart": "😘", 22 | "kissing": "😗", 23 | "kissing_smiling_eyes": "😙", 24 | "kissing_closed_eyes": "😚", 25 | "yum": "😋", 26 | "stuck_out_tongue_winking_eye": "😜", 27 | "stuck_out_tongue_closed_eyes": "😝", 28 | "stuck_out_tongue": "😛", 29 | "money_mouth_face": "🤑", 30 | "hugs": "🤗", 31 | "nerd_face": "🤓", 32 | "sunglasses": "😎", 33 | "clown_face": "🤡", 34 | "cowboy_hat_face": "🤠", 35 | "smirk": "😏", 36 | "unamused": "😒", 37 | "disappointed": "😞", 38 | "pensive": "😔", 39 | "worried": "😟", 40 | "confused": "😕", 41 | "slightly_frowning_face": "🙁", 42 | "frowning_face": "☹️", 43 | "persevere": "😣", 44 | "confounded": "😖", 45 | "tired_face": "😫", 46 | "weary": "😩", 47 | "triumph": "😤", 48 | "angry": "😠", 49 | "rage": "😡", 50 | "pout": "😡", 51 | "no_mouth": "😶", 52 | "neutral_face": "😐", 53 | "expressionless": "😑", 54 | "hushed": "😯", 55 | "frowning": "😦", 56 | "anguished": "😧", 57 | "open_mouth": "😮", 58 | "astonished": "😲", 59 | "dizzy_face": "😵", 60 | "flushed": "😳", 61 | "scream": "😱", 62 | "fearful": "😨", 63 | "cold_sweat": "😰", 64 | "cry": "😢", 65 | "disappointed_relieved": "😥", 66 | "drooling_face": "🤤", 67 | "sob": "😭", 68 | "sweat": "😓", 69 | "sleepy": "😪", 70 | "sleeping": "😴", 71 | "roll_eyes": "🙄", 72 | "thinking": "🤔", 73 | "lying_face": "🤥", 74 | "grimacing": "😬", 75 | "zipper_mouth_face": "🤐", 76 | "nauseated_face": "🤢", 77 | "sneezing_face": "🤧", 78 | "mask": "😷", 79 | "face_with_thermometer": "🤒", 80 | "face_with_head_bandage": "🤕", 81 | "smiling_imp": "😈", 82 | "imp": "👿", 83 | "japanese_ogre": "👹", 84 | "japanese_goblin": "👺", 85 | "hankey": "💩", 86 | "poop": "💩", 87 | "shit": "💩", 88 | "ghost": "👻", 89 | "skull": "💀", 90 | "skull_and_crossbones": "☠️", 91 | "alien": "👽", 92 | "space_invader": "👾", 93 | "robot": "🤖", 94 | "jack_o_lantern": "🎃", 95 | "smiley_cat": "😺", 96 | "smile_cat": "😸", 97 | "joy_cat": "😹", 98 | "heart_eyes_cat": "😻", 99 | "smirk_cat": "😼", 100 | "kissing_cat": "😽", 101 | "scream_cat": "🙀", 102 | "crying_cat_face": "😿", 103 | "pouting_cat": "😾", 104 | "open_hands": "👐", 105 | "raised_hands": "🙌", 106 | "clap": "👏", 107 | "pray": "🙏", 108 | "handshake": "🤝", 109 | "+1": "👍", 110 | "thumbsup": "👍", 111 | "-1": "👎", 112 | "thumbsdown": "👎", 113 | "fist_oncoming": "👊", 114 | "facepunch": "👊", 115 | "punch": "👊", 116 | "fist_raised": "✊", 117 | "fist": "✊", 118 | "fist_left": "🤛", 119 | "fist_right": "🤜", 120 | "crossed_fingers": "🤞", 121 | "v": "✌️", 122 | "metal": "🤘", 123 | "ok_hand": "👌", 124 | "point_left": "👈", 125 | "point_right": "👉", 126 | "point_up_2": "👆", 127 | "point_down": "👇", 128 | "point_up": "☝️", 129 | "hand": "✋", 130 | "raised_hand": "✋", 131 | "raised_back_of_hand": "🤚", 132 | "raised_hand_with_fingers_splayed": "🖐", 133 | "vulcan_salute": "🖖", 134 | "wave": "👋", 135 | "call_me_hand": "🤙", 136 | "muscle": "💪", 137 | "middle_finger": "🖕", 138 | "fu": "🖕", 139 | "writing_hand": "✍️", 140 | "selfie": "🤳", 141 | "nail_care": "💅", 142 | "ring": "💍", 143 | "lipstick": "💄", 144 | "kiss": "💋", 145 | "lips": "👄", 146 | "tongue": "👅", 147 | "ear": "👂", 148 | "nose": "👃", 149 | "footprints": "👣", 150 | "eye": "👁", 151 | "eyes": "👀", 152 | "speaking_head": "🗣", 153 | "bust_in_silhouette": "👤", 154 | "busts_in_silhouette": "👥", 155 | "baby": "👶", 156 | "boy": "👦", 157 | "girl": "👧", 158 | "man": "👨", 159 | "woman": "👩", 160 | "blonde_woman": "👱‍♀", 161 | "blonde_man": "👱", 162 | "person_with_blond_hair": "👱", 163 | "older_man": "👴", 164 | "older_woman": "👵", 165 | "man_with_gua_pi_mao": "👲", 166 | "woman_with_turban": "👳‍♀", 167 | "man_with_turban": "👳", 168 | "policewoman": "👮‍♀", 169 | "policeman": "👮", 170 | "cop": "👮", 171 | "construction_worker_woman": "👷‍♀", 172 | "construction_worker_man": "👷", 173 | "construction_worker": "👷", 174 | "guardswoman": "💂‍♀", 175 | "guardsman": "💂", 176 | "female_detective": "🕵️‍♀️", 177 | "male_detective": "🕵", 178 | "detective": "🕵", 179 | "woman_health_worker": "👩‍⚕", 180 | "man_health_worker": "👨‍⚕", 181 | "woman_farmer": "👩‍🌾", 182 | "man_farmer": "👨‍🌾", 183 | "woman_cook": "👩‍🍳", 184 | "man_cook": "👨‍🍳", 185 | "woman_student": "👩‍🎓", 186 | "man_student": "👨‍🎓", 187 | "woman_singer": "👩‍🎤", 188 | "man_singer": "👨‍🎤", 189 | "woman_teacher": "👩‍🏫", 190 | "man_teacher": "👨‍🏫", 191 | "woman_factory_worker": "👩‍🏭", 192 | "man_factory_worker": "👨‍🏭", 193 | "woman_technologist": "👩‍💻", 194 | "man_technologist": "👨‍💻", 195 | "woman_office_worker": "👩‍💼", 196 | "man_office_worker": "👨‍💼", 197 | "woman_mechanic": "👩‍🔧", 198 | "man_mechanic": "👨‍🔧", 199 | "woman_scientist": "👩‍🔬", 200 | "man_scientist": "👨‍🔬", 201 | "woman_artist": "👩‍🎨", 202 | "man_artist": "👨‍🎨", 203 | "woman_firefighter": "👩‍🚒", 204 | "man_firefighter": "👨‍🚒", 205 | "woman_pilot": "👩‍✈", 206 | "man_pilot": "👨‍✈", 207 | "woman_astronaut": "👩‍🚀", 208 | "man_astronaut": "👨‍🚀", 209 | "woman_judge": "👩‍⚖", 210 | "man_judge": "👨‍⚖", 211 | "mrs_claus": "🤶", 212 | "santa": "🎅", 213 | "princess": "👸", 214 | "prince": "🤴", 215 | "bride_with_veil": "👰", 216 | "man_in_tuxedo": "🤵", 217 | "angel": "👼", 218 | "pregnant_woman": "🤰", 219 | "bowing_woman": "🙇‍♀", 220 | "bowing_man": "🙇", 221 | "bow": "🙇", 222 | "tipping_hand_woman": "💁", 223 | "information_desk_person": "💁", 224 | "sassy_woman": "💁", 225 | "tipping_hand_man": "💁‍♂", 226 | "sassy_man": "💁‍♂", 227 | "no_good_woman": "🙅", 228 | "no_good": "🙅", 229 | "ng_woman": "🙅", 230 | "no_good_man": "🙅‍♂", 231 | "ng_man": "🙅‍♂", 232 | "ok_woman": "🙆", 233 | "ok_man": "🙆‍♂", 234 | "raising_hand_woman": "🙋", 235 | "raising_hand": "🙋", 236 | "raising_hand_man": "🙋‍♂", 237 | "woman_facepalming": "🤦‍♀", 238 | "man_facepalming": "🤦‍♂", 239 | "woman_shrugging": "🤷‍♀", 240 | "man_shrugging": "🤷‍♂", 241 | "pouting_woman": "🙎", 242 | "person_with_pouting_face": "🙎", 243 | "pouting_man": "🙎‍♂", 244 | "frowning_woman": "🙍", 245 | "person_frowning": "🙍", 246 | "frowning_man": "🙍‍♂", 247 | "haircut_woman": "💇", 248 | "haircut": "💇", 249 | "haircut_man": "💇‍♂", 250 | "massage_woman": "💆", 251 | "massage": "💆", 252 | "massage_man": "💆‍♂", 253 | "business_suit_levitating": "🕴", 254 | "dancer": "💃", 255 | "man_dancing": "🕺", 256 | "dancing_women": "👯", 257 | "dancers": "👯", 258 | "dancing_men": "👯‍♂", 259 | "walking_woman": "🚶‍♀", 260 | "walking_man": "🚶", 261 | "walking": "🚶", 262 | "running_woman": "🏃‍♀", 263 | "running_man": "🏃", 264 | "runner": "🏃", 265 | "running": "🏃", 266 | "couple": "👫", 267 | "two_women_holding_hands": "👭", 268 | "two_men_holding_hands": "👬", 269 | "couple_with_heart_woman_man": "💑", 270 | "couple_with_heart": "💑", 271 | "couple_with_heart_woman_woman": "👩‍❤️‍👩", 272 | "couple_with_heart_man_man": "👨‍❤️‍👨", 273 | "couplekiss_man_woman": "💏", 274 | "couplekiss_woman_woman": "👩‍❤️‍💋‍👩", 275 | "couplekiss_man_man": "👨‍❤️‍💋‍👨", 276 | "family_man_woman_boy": "👪", 277 | "family": "👪", 278 | "family_man_woman_girl": "👨‍👩‍👧", 279 | "family_man_woman_girl_boy": "👨‍👩‍👧‍👦", 280 | "family_man_woman_boy_boy": "👨‍👩‍👦‍👦", 281 | "family_man_woman_girl_girl": "👨‍👩‍👧‍👧", 282 | "family_woman_woman_boy": "👩‍👩‍👦", 283 | "family_woman_woman_girl": "👩‍👩‍👧", 284 | "family_woman_woman_girl_boy": "👩‍👩‍👧‍👦", 285 | "family_woman_woman_boy_boy": "👩‍👩‍👦‍👦", 286 | "family_woman_woman_girl_girl": "👩‍👩‍👧‍👧", 287 | "family_man_man_boy": "👨‍👨‍👦", 288 | "family_man_man_girl": "👨‍👨‍👧", 289 | "family_man_man_girl_boy": "👨‍👨‍👧‍👦", 290 | "family_man_man_boy_boy": "👨‍👨‍👦‍👦", 291 | "family_man_man_girl_girl": "👨‍👨‍👧‍👧", 292 | "family_woman_boy": "👩‍👦", 293 | "family_woman_girl": "👩‍👧", 294 | "family_woman_girl_boy": "👩‍👧‍👦", 295 | "family_woman_boy_boy": "👩‍👦‍👦", 296 | "family_woman_girl_girl": "👩‍👧‍👧", 297 | "family_man_boy": "👨‍👦", 298 | "family_man_girl": "👨‍👧", 299 | "family_man_girl_boy": "👨‍👧‍👦", 300 | "family_man_boy_boy": "👨‍👦‍👦", 301 | "family_man_girl_girl": "👨‍👧‍👧", 302 | "womans_clothes": "👚", 303 | "shirt": "👕", 304 | "tshirt": "👕", 305 | "jeans": "👖", 306 | "necktie": "👔", 307 | "dress": "👗", 308 | "bikini": "👙", 309 | "kimono": "👘", 310 | "high_heel": "👠", 311 | "sandal": "👡", 312 | "boot": "👢", 313 | "mans_shoe": "👞", 314 | "shoe": "👞", 315 | "athletic_shoe": "👟", 316 | "womans_hat": "👒", 317 | "tophat": "🎩", 318 | "mortar_board": "🎓", 319 | "crown": "👑", 320 | "rescue_worker_helmet": "⛑", 321 | "school_satchel": "🎒", 322 | "pouch": "👝", 323 | "purse": "👛", 324 | "handbag": "👜", 325 | "briefcase": "💼", 326 | "eyeglasses": "👓", 327 | "dark_sunglasses": "🕶", 328 | "closed_umbrella": "🌂", 329 | "open_umbrella": "☂️", 330 | "dog": "🐶", 331 | "cat": "🐱", 332 | "mouse": "🐭", 333 | "hamster": "🐹", 334 | "rabbit": "🐰", 335 | "fox_face": "🦊", 336 | "bear": "🐻", 337 | "panda_face": "🐼", 338 | "koala": "🐨", 339 | "tiger": "🐯", 340 | "lion": "🦁", 341 | "cow": "🐮", 342 | "pig": "🐷", 343 | "pig_nose": "🐽", 344 | "frog": "🐸", 345 | "monkey_face": "🐵", 346 | "see_no_evil": "🙈", 347 | "hear_no_evil": "🙉", 348 | "speak_no_evil": "🙊", 349 | "monkey": "🐒", 350 | "chicken": "🐔", 351 | "penguin": "🐧", 352 | "bird": "🐦", 353 | "baby_chick": "🐤", 354 | "hatching_chick": "🐣", 355 | "hatched_chick": "🐥", 356 | "duck": "🦆", 357 | "eagle": "🦅", 358 | "owl": "🦉", 359 | "bat": "🦇", 360 | "wolf": "🐺", 361 | "boar": "🐗", 362 | "horse": "🐴", 363 | "unicorn": "🦄", 364 | "bee": "🐝", 365 | "honeybee": "🐝", 366 | "bug": "🐛", 367 | "butterfly": "🦋", 368 | "snail": "🐌", 369 | "shell": "🐚", 370 | "beetle": "🐞", 371 | "ant": "🐜", 372 | "spider": "🕷", 373 | "spider_web": "🕸", 374 | "turtle": "🐢", 375 | "snake": "🐍", 376 | "lizard": "🦎", 377 | "scorpion": "🦂", 378 | "crab": "🦀", 379 | "squid": "🦑", 380 | "octopus": "🐙", 381 | "shrimp": "🦐", 382 | "tropical_fish": "🐠", 383 | "fish": "🐟", 384 | "blowfish": "🐡", 385 | "dolphin": "🐬", 386 | "flipper": "🐬", 387 | "shark": "🦈", 388 | "whale": "🐳", 389 | "whale2": "🐋", 390 | "crocodile": "🐊", 391 | "leopard": "🐆", 392 | "tiger2": "🐅", 393 | "water_buffalo": "🐃", 394 | "ox": "🐂", 395 | "cow2": "🐄", 396 | "deer": "🦌", 397 | "dromedary_camel": "🐪", 398 | "camel": "🐫", 399 | "elephant": "🐘", 400 | "rhinoceros": "🦏", 401 | "gorilla": "🦍", 402 | "racehorse": "🐎", 403 | "pig2": "🐖", 404 | "goat": "🐐", 405 | "ram": "🐏", 406 | "sheep": "🐑", 407 | "dog2": "🐕", 408 | "poodle": "🐩", 409 | "cat2": "🐈", 410 | "rooster": "🐓", 411 | "turkey": "🦃", 412 | "dove": "🕊", 413 | "rabbit2": "🐇", 414 | "mouse2": "🐁", 415 | "rat": "🐀", 416 | "chipmunk": "🐿", 417 | "feet": "🐾", 418 | "paw_prints": "🐾", 419 | "dragon": "🐉", 420 | "dragon_face": "🐲", 421 | "cactus": "🌵", 422 | "christmas_tree": "🎄", 423 | "evergreen_tree": "🌲", 424 | "deciduous_tree": "🌳", 425 | "palm_tree": "🌴", 426 | "seedling": "🌱", 427 | "herb": "🌿", 428 | "shamrock": "☘️", 429 | "four_leaf_clover": "🍀", 430 | "bamboo": "🎍", 431 | "tanabata_tree": "🎋", 432 | "leaves": "🍃", 433 | "fallen_leaf": "🍂", 434 | "maple_leaf": "🍁", 435 | "mushroom": "🍄", 436 | "ear_of_rice": "🌾", 437 | "bouquet": "💐", 438 | "tulip": "🌷", 439 | "rose": "🌹", 440 | "wilted_flower": "🥀", 441 | "sunflower": "🌻", 442 | "blossom": "🌼", 443 | "cherry_blossom": "🌸", 444 | "hibiscus": "🌺", 445 | "earth_americas": "🌎", 446 | "earth_africa": "🌍", 447 | "earth_asia": "🌏", 448 | "full_moon": "🌕", 449 | "waning_gibbous_moon": "🌖", 450 | "last_quarter_moon": "🌗", 451 | "waning_crescent_moon": "🌘", 452 | "new_moon": "🌑", 453 | "waxing_crescent_moon": "🌒", 454 | "first_quarter_moon": "🌓", 455 | "moon": "🌔", 456 | "waxing_gibbous_moon": "🌔", 457 | "new_moon_with_face": "🌚", 458 | "full_moon_with_face": "🌝", 459 | "sun_with_face": "🌞", 460 | "first_quarter_moon_with_face": "🌛", 461 | "last_quarter_moon_with_face": "🌜", 462 | "crescent_moon": "🌙", 463 | "dizzy": "💫", 464 | "star": "⭐️", 465 | "star2": "🌟", 466 | "sparkles": "✨", 467 | "zap": "⚡️", 468 | "fire": "🔥", 469 | "boom": "💥", 470 | "collision": "💥", 471 | "comet": "☄", 472 | "sunny": "☀️", 473 | "sun_behind_small_cloud": "🌤", 474 | "partly_sunny": "⛅️", 475 | "sun_behind_large_cloud": "🌥", 476 | "sun_behind_rain_cloud": "🌦", 477 | "rainbow": "🌈", 478 | "cloud": "☁️", 479 | "cloud_with_rain": "🌧", 480 | "cloud_with_lightning_and_rain": "⛈", 481 | "cloud_with_lightning": "🌩", 482 | "cloud_with_snow": "🌨", 483 | "snowman_with_snow": "☃️", 484 | "snowman": "⛄️", 485 | "snowflake": "❄️", 486 | "wind_face": "🌬", 487 | "dash": "💨", 488 | "tornado": "🌪", 489 | "fog": "🌫", 490 | "ocean": "🌊", 491 | "droplet": "💧", 492 | "sweat_drops": "💦", 493 | "umbrella": "☔️", 494 | "green_apple": "🍏", 495 | "apple": "🍎", 496 | "pear": "🍐", 497 | "tangerine": "🍊", 498 | "orange": "🍊", 499 | "mandarin": "🍊", 500 | "lemon": "🍋", 501 | "banana": "🍌", 502 | "watermelon": "🍉", 503 | "grapes": "🍇", 504 | "strawberry": "🍓", 505 | "melon": "🍈", 506 | "cherries": "🍒", 507 | "peach": "🍑", 508 | "pineapple": "🍍", 509 | "kiwi_fruit": "🥝", 510 | "avocado": "🥑", 511 | "tomato": "🍅", 512 | "eggplant": "🍆", 513 | "cucumber": "🥒", 514 | "carrot": "🥕", 515 | "corn": "🌽", 516 | "hot_pepper": "🌶", 517 | "potato": "🥔", 518 | "sweet_potato": "🍠", 519 | "chestnut": "🌰", 520 | "peanuts": "🥜", 521 | "honey_pot": "🍯", 522 | "croissant": "🥐", 523 | "bread": "🍞", 524 | "baguette_bread": "🥖", 525 | "cheese": "🧀", 526 | "egg": "🥚", 527 | "fried_egg": "🍳", 528 | "bacon": "🥓", 529 | "pancakes": "🥞", 530 | "fried_shrimp": "🍤", 531 | "poultry_leg": "🍗", 532 | "meat_on_bone": "🍖", 533 | "pizza": "🍕", 534 | "hotdog": "🌭", 535 | "hamburger": "🍔", 536 | "fries": "🍟", 537 | "stuffed_flatbread": "🥙", 538 | "taco": "🌮", 539 | "burrito": "🌯", 540 | "green_salad": "🥗", 541 | "shallow_pan_of_food": "🥘", 542 | "spaghetti": "🍝", 543 | "ramen": "🍜", 544 | "stew": "🍲", 545 | "fish_cake": "🍥", 546 | "sushi": "🍣", 547 | "bento": "🍱", 548 | "curry": "🍛", 549 | "rice": "🍚", 550 | "rice_ball": "🍙", 551 | "rice_cracker": "🍘", 552 | "oden": "🍢", 553 | "dango": "🍡", 554 | "shaved_ice": "🍧", 555 | "ice_cream": "🍨", 556 | "icecream": "🍦", 557 | "cake": "🍰", 558 | "birthday": "🎂", 559 | "custard": "🍮", 560 | "lollipop": "🍭", 561 | "candy": "🍬", 562 | "chocolate_bar": "🍫", 563 | "popcorn": "🍿", 564 | "doughnut": "🍩", 565 | "cookie": "🍪", 566 | "milk_glass": "🥛", 567 | "baby_bottle": "🍼", 568 | "coffee": "☕️", 569 | "tea": "🍵", 570 | "sake": "🍶", 571 | "beer": "🍺", 572 | "beers": "🍻", 573 | "clinking_glasses": "🥂", 574 | "wine_glass": "🍷", 575 | "tumbler_glass": "🥃", 576 | "cocktail": "🍸", 577 | "tropical_drink": "🍹", 578 | "champagne": "🍾", 579 | "spoon": "🥄", 580 | "fork_and_knife": "🍴", 581 | "plate_with_cutlery": "🍽", 582 | "soccer": "⚽️", 583 | "basketball": "🏀", 584 | "football": "🏈", 585 | "baseball": "⚾️", 586 | "tennis": "🎾", 587 | "volleyball": "🏐", 588 | "rugby_football": "🏉", 589 | "8ball": "🎱", 590 | "ping_pong": "🏓", 591 | "badminton": "🏸", 592 | "goal_net": "🥅", 593 | "ice_hockey": "🏒", 594 | "field_hockey": "🏑", 595 | "cricket": "🏏", 596 | "golf": "⛳️", 597 | "bow_and_arrow": "🏹", 598 | "fishing_pole_and_fish": "🎣", 599 | "boxing_glove": "🥊", 600 | "martial_arts_uniform": "🥋", 601 | "ice_skate": "⛸", 602 | "ski": "🎿", 603 | "skier": "⛷", 604 | "snowboarder": "🏂", 605 | "weight_lifting_woman": "🏋️‍♀️", 606 | "weight_lifting_man": "🏋", 607 | "person_fencing": "🤺", 608 | "women_wrestling": "🤼‍♀", 609 | "men_wrestling": "🤼‍♂", 610 | "woman_cartwheeling": "🤸‍♀", 611 | "man_cartwheeling": "🤸‍♂", 612 | "basketball_woman": "⛹️‍♀️", 613 | "basketball_man": "⛹", 614 | "woman_playing_handball": "🤾‍♀", 615 | "man_playing_handball": "🤾‍♂", 616 | "golfing_woman": "🏌️‍♀️", 617 | "golfing_man": "🏌", 618 | "surfing_woman": "🏄‍♀", 619 | "surfing_man": "🏄", 620 | "surfer": "🏄", 621 | "swimming_woman": "🏊‍♀", 622 | "swimming_man": "🏊", 623 | "swimmer": "🏊", 624 | "woman_playing_water_polo": "🤽‍♀", 625 | "man_playing_water_polo": "🤽‍♂", 626 | "rowing_woman": "🚣‍♀", 627 | "rowing_man": "🚣", 628 | "rowboat": "🚣", 629 | "horse_racing": "🏇", 630 | "biking_woman": "🚴‍♀", 631 | "biking_man": "🚴", 632 | "bicyclist": "🚴", 633 | "mountain_biking_woman": "🚵‍♀", 634 | "mountain_biking_man": "🚵", 635 | "mountain_bicyclist": "🚵", 636 | "running_shirt_with_sash": "🎽", 637 | "medal_sports": "🏅", 638 | "medal_military": "🎖", 639 | "1st_place_medal": "🥇", 640 | "2nd_place_medal": "🥈", 641 | "3rd_place_medal": "🥉", 642 | "trophy": "🏆", 643 | "rosette": "🏵", 644 | "reminder_ribbon": "🎗", 645 | "ticket": "🎫", 646 | "tickets": "🎟", 647 | "circus_tent": "🎪", 648 | "woman_juggling": "🤹‍♀", 649 | "man_juggling": "🤹‍♂", 650 | "performing_arts": "🎭", 651 | "art": "🎨", 652 | "clapper": "🎬", 653 | "microphone": "🎤", 654 | "headphones": "🎧", 655 | "musical_score": "🎼", 656 | "musical_keyboard": "🎹", 657 | "drum": "🥁", 658 | "saxophone": "🎷", 659 | "trumpet": "🎺", 660 | "guitar": "🎸", 661 | "violin": "🎻", 662 | "game_die": "🎲", 663 | "dart": "🎯", 664 | "bowling": "🎳", 665 | "video_game": "🎮", 666 | "slot_machine": "🎰", 667 | "car": "🚗", 668 | "red_car": "🚗", 669 | "taxi": "🚕", 670 | "blue_car": "🚙", 671 | "bus": "🚌", 672 | "trolleybus": "🚎", 673 | "racing_car": "🏎", 674 | "police_car": "🚓", 675 | "ambulance": "🚑", 676 | "fire_engine": "🚒", 677 | "minibus": "🚐", 678 | "truck": "🚚", 679 | "articulated_lorry": "🚛", 680 | "tractor": "🚜", 681 | "kick_scooter": "🛴", 682 | "bike": "🚲", 683 | "motor_scooter": "🛵", 684 | "motorcycle": "🏍", 685 | "rotating_light": "🚨", 686 | "oncoming_police_car": "🚔", 687 | "oncoming_bus": "🚍", 688 | "oncoming_automobile": "🚘", 689 | "oncoming_taxi": "🚖", 690 | "aerial_tramway": "🚡", 691 | "mountain_cableway": "🚠", 692 | "suspension_railway": "🚟", 693 | "railway_car": "🚃", 694 | "train": "🚋", 695 | "mountain_railway": "🚞", 696 | "monorail": "🚝", 697 | "bullettrain_side": "🚄", 698 | "bullettrain_front": "🚅", 699 | "light_rail": "🚈", 700 | "steam_locomotive": "🚂", 701 | "train2": "🚆", 702 | "metro": "🚇", 703 | "tram": "🚊", 704 | "station": "🚉", 705 | "helicopter": "🚁", 706 | "small_airplane": "🛩", 707 | "airplane": "✈️", 708 | "flight_departure": "🛫", 709 | "flight_arrival": "🛬", 710 | "rocket": "🚀", 711 | "artificial_satellite": "🛰", 712 | "seat": "💺", 713 | "canoe": "🛶", 714 | "boat": "⛵️", 715 | "sailboat": "⛵️", 716 | "motor_boat": "🛥", 717 | "speedboat": "🚤", 718 | "passenger_ship": "🛳", 719 | "ferry": "⛴", 720 | "ship": "🚢", 721 | "anchor": "⚓️", 722 | "construction": "🚧", 723 | "fuelpump": "⛽️", 724 | "busstop": "🚏", 725 | "vertical_traffic_light": "🚦", 726 | "traffic_light": "🚥", 727 | "world_map": "🗺", 728 | "moyai": "🗿", 729 | "statue_of_liberty": "🗽", 730 | "fountain": "⛲️", 731 | "tokyo_tower": "🗼", 732 | "european_castle": "🏰", 733 | "japanese_castle": "🏯", 734 | "stadium": "🏟", 735 | "ferris_wheel": "🎡", 736 | "roller_coaster": "🎢", 737 | "carousel_horse": "🎠", 738 | "parasol_on_ground": "⛱", 739 | "beach_umbrella": "🏖", 740 | "desert_island": "🏝", 741 | "mountain": "⛰", 742 | "mountain_snow": "🏔", 743 | "mount_fuji": "🗻", 744 | "volcano": "🌋", 745 | "desert": "🏜", 746 | "camping": "🏕", 747 | "tent": "⛺️", 748 | "railway_track": "🛤", 749 | "motorway": "🛣", 750 | "building_construction": "🏗", 751 | "factory": "🏭", 752 | "house": "🏠", 753 | "house_with_garden": "🏡", 754 | "houses": "🏘", 755 | "derelict_house": "🏚", 756 | "office": "🏢", 757 | "department_store": "🏬", 758 | "post_office": "🏣", 759 | "european_post_office": "🏤", 760 | "hospital": "🏥", 761 | "bank": "🏦", 762 | "hotel": "🏨", 763 | "convenience_store": "🏪", 764 | "school": "🏫", 765 | "love_hotel": "🏩", 766 | "wedding": "💒", 767 | "classical_building": "🏛", 768 | "church": "⛪️", 769 | "mosque": "🕌", 770 | "synagogue": "🕍", 771 | "kaaba": "🕋", 772 | "shinto_shrine": "⛩", 773 | "japan": "🗾", 774 | "rice_scene": "🎑", 775 | "national_park": "🏞", 776 | "sunrise": "🌅", 777 | "sunrise_over_mountains": "🌄", 778 | "stars": "🌠", 779 | "sparkler": "🎇", 780 | "fireworks": "🎆", 781 | "city_sunrise": "🌇", 782 | "city_sunset": "🌆", 783 | "cityscape": "🏙", 784 | "night_with_stars": "🌃", 785 | "milky_way": "🌌", 786 | "bridge_at_night": "🌉", 787 | "foggy": "🌁", 788 | "watch": "⌚️", 789 | "iphone": "📱", 790 | "calling": "📲", 791 | "computer": "💻", 792 | "keyboard": "⌨️", 793 | "desktop_computer": "🖥", 794 | "printer": "🖨", 795 | "computer_mouse": "🖱", 796 | "trackball": "🖲", 797 | "joystick": "🕹", 798 | "clamp": "🗜", 799 | "minidisc": "💽", 800 | "floppy_disk": "💾", 801 | "cd": "💿", 802 | "dvd": "📀", 803 | "vhs": "📼", 804 | "camera": "📷", 805 | "camera_flash": "📸", 806 | "video_camera": "📹", 807 | "movie_camera": "🎥", 808 | "film_projector": "📽", 809 | "film_strip": "🎞", 810 | "telephone_receiver": "📞", 811 | "phone": "☎️", 812 | "telephone": "☎️", 813 | "pager": "📟", 814 | "fax": "📠", 815 | "tv": "📺", 816 | "radio": "📻", 817 | "studio_microphone": "🎙", 818 | "level_slider": "🎚", 819 | "control_knobs": "🎛", 820 | "stopwatch": "⏱", 821 | "timer_clock": "⏲", 822 | "alarm_clock": "⏰", 823 | "mantelpiece_clock": "🕰", 824 | "hourglass": "⌛️", 825 | "hourglass_flowing_sand": "⏳", 826 | "satellite": "📡", 827 | "battery": "🔋", 828 | "electric_plug": "🔌", 829 | "bulb": "💡", 830 | "flashlight": "🔦", 831 | "candle": "🕯", 832 | "wastebasket": "🗑", 833 | "oil_drum": "🛢", 834 | "money_with_wings": "💸", 835 | "dollar": "💵", 836 | "yen": "💴", 837 | "euro": "💶", 838 | "pound": "💷", 839 | "moneybag": "💰", 840 | "credit_card": "💳", 841 | "gem": "💎", 842 | "balance_scale": "⚖️", 843 | "wrench": "🔧", 844 | "hammer": "🔨", 845 | "hammer_and_pick": "⚒", 846 | "hammer_and_wrench": "🛠", 847 | "pick": "⛏", 848 | "nut_and_bolt": "🔩", 849 | "gear": "⚙️", 850 | "chains": "⛓", 851 | "gun": "🔫", 852 | "bomb": "💣", 853 | "hocho": "🔪", 854 | "knife": "🔪", 855 | "dagger": "🗡", 856 | "crossed_swords": "⚔️", 857 | "shield": "🛡", 858 | "smoking": "🚬", 859 | "coffin": "⚰️", 860 | "funeral_urn": "⚱️", 861 | "amphora": "🏺", 862 | "crystal_ball": "🔮", 863 | "prayer_beads": "📿", 864 | "barber": "💈", 865 | "alembic": "⚗️", 866 | "telescope": "🔭", 867 | "microscope": "🔬", 868 | "hole": "🕳", 869 | "pill": "💊", 870 | "syringe": "💉", 871 | "thermometer": "🌡", 872 | "toilet": "🚽", 873 | "potable_water": "🚰", 874 | "shower": "🚿", 875 | "bathtub": "🛁", 876 | "bath": "🛀", 877 | "bellhop_bell": "🛎", 878 | "key": "🔑", 879 | "old_key": "🗝", 880 | "door": "🚪", 881 | "couch_and_lamp": "🛋", 882 | "bed": "🛏", 883 | "sleeping_bed": "🛌", 884 | "framed_picture": "🖼", 885 | "shopping": "🛍", 886 | "shopping_cart": "🛒", 887 | "gift": "🎁", 888 | "balloon": "🎈", 889 | "flags": "🎏", 890 | "ribbon": "🎀", 891 | "confetti_ball": "🎊", 892 | "tada": "🎉", 893 | "dolls": "🎎", 894 | "izakaya_lantern": "🏮", 895 | "lantern": "🏮", 896 | "wind_chime": "🎐", 897 | "email": "✉️", 898 | "envelope": "✉️", 899 | "envelope_with_arrow": "📩", 900 | "incoming_envelope": "📨", 901 | "e-mail": "📧", 902 | "love_letter": "💌", 903 | "inbox_tray": "📥", 904 | "outbox_tray": "📤", 905 | "package": "📦", 906 | "label": "🏷", 907 | "mailbox_closed": "📪", 908 | "mailbox": "📫", 909 | "mailbox_with_mail": "📬", 910 | "mailbox_with_no_mail": "📭", 911 | "postbox": "📮", 912 | "postal_horn": "📯", 913 | "scroll": "📜", 914 | "page_with_curl": "📃", 915 | "page_facing_up": "📄", 916 | "bookmark_tabs": "📑", 917 | "bar_chart": "📊", 918 | "chart_with_upwards_trend": "📈", 919 | "chart_with_downwards_trend": "📉", 920 | "spiral_notepad": "🗒", 921 | "spiral_calendar": "🗓", 922 | "calendar": "📆", 923 | "date": "📅", 924 | "card_index": "📇", 925 | "card_file_box": "🗃", 926 | "ballot_box": "🗳", 927 | "file_cabinet": "🗄", 928 | "clipboard": "📋", 929 | "file_folder": "📁", 930 | "open_file_folder": "📂", 931 | "card_index_dividers": "🗂", 932 | "newspaper_roll": "🗞", 933 | "newspaper": "📰", 934 | "notebook": "📓", 935 | "notebook_with_decorative_cover": "📔", 936 | "ledger": "📒", 937 | "closed_book": "📕", 938 | "green_book": "📗", 939 | "blue_book": "📘", 940 | "orange_book": "📙", 941 | "books": "📚", 942 | "book": "📖", 943 | "open_book": "📖", 944 | "bookmark": "🔖", 945 | "link": "🔗", 946 | "paperclip": "📎", 947 | "paperclips": "🖇", 948 | "triangular_ruler": "📐", 949 | "straight_ruler": "📏", 950 | "pushpin": "📌", 951 | "round_pushpin": "📍", 952 | "scissors": "✂️", 953 | "pen": "🖊", 954 | "fountain_pen": "🖋", 955 | "black_nib": "✒️", 956 | "paintbrush": "🖌", 957 | "crayon": "🖍", 958 | "memo": "📝", 959 | "pencil": "📝", 960 | "pencil2": "✏️", 961 | "mag": "🔍", 962 | "mag_right": "🔎", 963 | "lock_with_ink_pen": "🔏", 964 | "closed_lock_with_key": "🔐", 965 | "lock": "🔒", 966 | "unlock": "🔓", 967 | "heart": "❤️", 968 | "yellow_heart": "💛", 969 | "green_heart": "💚", 970 | "blue_heart": "💙", 971 | "purple_heart": "💜", 972 | "black_heart": "🖤", 973 | "broken_heart": "💔", 974 | "heavy_heart_exclamation": "❣️", 975 | "two_hearts": "💕", 976 | "revolving_hearts": "💞", 977 | "heartbeat": "💓", 978 | "heartpulse": "💗", 979 | "sparkling_heart": "💖", 980 | "cupid": "💘", 981 | "gift_heart": "💝", 982 | "heart_decoration": "💟", 983 | "peace_symbol": "☮️", 984 | "latin_cross": "✝️", 985 | "star_and_crescent": "☪️", 986 | "om": "🕉", 987 | "wheel_of_dharma": "☸️", 988 | "star_of_david": "✡️", 989 | "six_pointed_star": "🔯", 990 | "menorah": "🕎", 991 | "yin_yang": "☯️", 992 | "orthodox_cross": "☦️", 993 | "place_of_worship": "🛐", 994 | "ophiuchus": "⛎", 995 | "aries": "♈️", 996 | "taurus": "♉️", 997 | "gemini": "♊️", 998 | "cancer": "♋️", 999 | "leo": "♌️", 1000 | "virgo": "♍️", 1001 | "libra": "♎️", 1002 | "scorpius": "♏️", 1003 | "sagittarius": "♐️", 1004 | "capricorn": "♑️", 1005 | "aquarius": "♒️", 1006 | "pisces": "♓️", 1007 | "id": "🆔", 1008 | "atom_symbol": "⚛️", 1009 | "accept": "🉑", 1010 | "radioactive": "☢️", 1011 | "biohazard": "☣️", 1012 | "mobile_phone_off": "📴", 1013 | "vibration_mode": "📳", 1014 | "eight_pointed_black_star": "✴️", 1015 | "vs": "🆚", 1016 | "white_flower": "💮", 1017 | "ideograph_advantage": "🉐", 1018 | "secret": "㊙️", 1019 | "congratulations": "㊗️", 1020 | "u6e80": "🈵", 1021 | "a": "🅰️", 1022 | "b": "🅱️", 1023 | "ab": "🆎", 1024 | "cl": "🆑", 1025 | "o2": "🅾️", 1026 | "sos": "🆘", 1027 | "x": "❌", 1028 | "o": "⭕️", 1029 | "stop_sign": "🛑", 1030 | "no_entry": "⛔️", 1031 | "name_badge": "📛", 1032 | "no_entry_sign": "🚫", 1033 | "anger": "💢", 1034 | "hotsprings": "♨️", 1035 | "no_pedestrians": "🚷", 1036 | "do_not_litter": "🚯", 1037 | "no_bicycles": "🚳", 1038 | "non-potable_water": "🚱", 1039 | "underage": "🔞", 1040 | "no_mobile_phones": "📵", 1041 | "no_smoking": "🚭", 1042 | "exclamation": "❗️", 1043 | "heavy_exclamation_mark": "❗️", 1044 | "grey_exclamation": "❕", 1045 | "question": "❓", 1046 | "grey_question": "❔", 1047 | "bangbang": "‼️", 1048 | "interrobang": "⁉️", 1049 | "low_brightness": "🔅", 1050 | "high_brightness": "🔆", 1051 | "part_alternation_mark": "〽️", 1052 | "warning": "⚠️", 1053 | "children_crossing": "🚸", 1054 | "trident": "🔱", 1055 | "fleur_de_lis": "⚜️", 1056 | "beginner": "🔰", 1057 | "recycle": "♻️", 1058 | "white_check_mark": "✅", 1059 | "chart": "💹", 1060 | "sparkle": "❇️", 1061 | "eight_spoked_asterisk": "✳️", 1062 | "negative_squared_cross_mark": "❎", 1063 | "globe_with_meridians": "🌐", 1064 | "diamond_shape_with_a_dot_inside": "💠", 1065 | "m": "Ⓜ️", 1066 | "cyclone": "🌀", 1067 | "zzz": "💤", 1068 | "atm": "🏧", 1069 | "wc": "🚾", 1070 | "wheelchair": "♿️", 1071 | "parking": "🅿️", 1072 | "sa": "🈂️", 1073 | "passport_control": "🛂", 1074 | "customs": "🛃", 1075 | "baggage_claim": "🛄", 1076 | "left_luggage": "🛅", 1077 | "mens": "🚹", 1078 | "womens": "🚺", 1079 | "baby_symbol": "🚼", 1080 | "restroom": "🚻", 1081 | "put_litter_in_its_place": "🚮", 1082 | "cinema": "🎦", 1083 | "signal_strength": "📶", 1084 | "koko": "🈁", 1085 | "symbols": "🔣", 1086 | "information_source": "ℹ️", 1087 | "abc": "🔤", 1088 | "abcd": "🔡", 1089 | "capital_abcd": "🔠", 1090 | "ng": "🆖", 1091 | "ok": "🆗", 1092 | "up": "🆙", 1093 | "cool": "🆒", 1094 | "new": "🆕", 1095 | "free": "🆓", 1096 | "zero": "0️⃣", 1097 | "one": "1️⃣", 1098 | "two": "2️⃣", 1099 | "three": "3️⃣", 1100 | "four": "4️⃣", 1101 | "five": "5️⃣", 1102 | "six": "6️⃣", 1103 | "seven": "7️⃣", 1104 | "eight": "8️⃣", 1105 | "nine": "9️⃣", 1106 | "keycap_ten": "🔟", 1107 | "hash": "#️⃣", 1108 | "asterisk": "*️⃣", 1109 | "arrow_forward": "▶️", 1110 | "pause_button": "⏸", 1111 | "play_or_pause_button": "⏯", 1112 | "stop_button": "⏹", 1113 | "record_button": "⏺", 1114 | "next_track_button": "⏭", 1115 | "previous_track_button": "⏮", 1116 | "fast_forward": "⏩", 1117 | "rewind": "⏪", 1118 | "arrow_double_up": "⏫", 1119 | "arrow_double_down": "⏬", 1120 | "arrow_backward": "◀️", 1121 | "arrow_up_small": "🔼", 1122 | "arrow_down_small": "🔽", 1123 | "arrow_right": "➡️", 1124 | "arrow_left": "⬅️", 1125 | "arrow_up": "⬆️", 1126 | "arrow_down": "⬇️", 1127 | "arrow_upper_right": "↗️", 1128 | "arrow_lower_right": "↘️", 1129 | "arrow_lower_left": "↙️", 1130 | "arrow_upper_left": "↖️", 1131 | "arrow_up_down": "↕️", 1132 | "left_right_arrow": "↔️", 1133 | "arrow_right_hook": "↪️", 1134 | "leftwards_arrow_with_hook": "↩️", 1135 | "arrow_heading_up": "⤴️", 1136 | "arrow_heading_down": "⤵️", 1137 | "twisted_rightwards_arrows": "🔀", 1138 | "repeat": "🔁", 1139 | "repeat_one": "🔂", 1140 | "arrows_counterclockwise": "🔄", 1141 | "arrows_clockwise": "🔃", 1142 | "musical_note": "🎵", 1143 | "notes": "🎶", 1144 | "heavy_plus_sign": "➕", 1145 | "heavy_minus_sign": "➖", 1146 | "heavy_division_sign": "➗", 1147 | "heavy_multiplication_x": "✖️", 1148 | "heavy_dollar_sign": "💲", 1149 | "currency_exchange": "💱", 1150 | "tm": "™️", 1151 | "copyright": "©️", 1152 | "registered": "®️", 1153 | "wavy_dash": "〰️", 1154 | "curly_loop": "➰", 1155 | "loop": "➿", 1156 | "end": "🔚", 1157 | "back": "🔙", 1158 | "on": "🔛", 1159 | "top": "🔝", 1160 | "soon": "🔜", 1161 | "heavy_check_mark": "✔️", 1162 | "ballot_box_with_check": "☑️", 1163 | "radio_button": "🔘", 1164 | "white_circle": "⚪️", 1165 | "black_circle": "⚫️", 1166 | "red_circle": "🔴", 1167 | "large_blue_circle": "🔵", 1168 | "small_red_triangle": "🔺", 1169 | "small_red_triangle_down": "🔻", 1170 | "small_orange_diamond": "🔸", 1171 | "small_blue_diamond": "🔹", 1172 | "large_orange_diamond": "🔶", 1173 | "large_blue_diamond": "🔷", 1174 | "white_square_button": "🔳", 1175 | "black_square_button": "🔲", 1176 | "black_small_square": "▪️", 1177 | "white_small_square": "▫️", 1178 | "black_medium_small_square": "◾️", 1179 | "white_medium_small_square": "◽️", 1180 | "black_medium_square": "◼️", 1181 | "white_medium_square": "◻️", 1182 | "black_large_square": "⬛️", 1183 | "white_large_square": "⬜️", 1184 | "speaker": "🔈", 1185 | "mute": "🔇", 1186 | "sound": "🔉", 1187 | "loud_sound": "🔊", 1188 | "bell": "🔔", 1189 | "no_bell": "🔕", 1190 | "mega": "📣", 1191 | "loudspeaker": "📢", 1192 | "eye_speech_bubble": "👁‍🗨", 1193 | "speech_balloon": "💬", 1194 | "thought_balloon": "💭", 1195 | "right_anger_bubble": "🗯", 1196 | "spades": "♠️", 1197 | "clubs": "♣️", 1198 | "hearts": "♥️", 1199 | "diamonds": "♦️", 1200 | "black_joker": "🃏", 1201 | "flower_playing_cards": "🎴", 1202 | "mahjong": "🀄️", 1203 | "clock1": "🕐", 1204 | "clock2": "🕑", 1205 | "clock3": "🕒", 1206 | "clock4": "🕓", 1207 | "clock5": "🕔", 1208 | "clock6": "🕕", 1209 | "clock7": "🕖", 1210 | "clock8": "🕗", 1211 | "clock9": "🕘", 1212 | "clock10": "🕙", 1213 | "clock11": "🕚", 1214 | "clock12": "🕛", 1215 | "clock130": "🕜", 1216 | "clock230": "🕝", 1217 | "clock330": "🕞", 1218 | "clock430": "🕟", 1219 | "clock530": "🕠", 1220 | "clock630": "🕡", 1221 | "clock730": "🕢", 1222 | "clock830": "🕣", 1223 | "clock930": "🕤", 1224 | "clock1030": "🕥", 1225 | "clock1130": "🕦", 1226 | "clock1230": "🕧", 1227 | "white_flag": "🏳️", 1228 | "black_flag": "🏴", 1229 | "checkered_flag": "🏁", 1230 | "triangular_flag_on_post": "🚩", 1231 | "rainbow_flag": "🏳️‍🌈", 1232 | "afghanistan": "🇦🇫", 1233 | "aland_islands": "🇦🇽", 1234 | "albania": "🇦🇱", 1235 | "algeria": "🇩🇿", 1236 | "american_samoa": "🇦🇸", 1237 | "andorra": "🇦🇩", 1238 | "angola": "🇦🇴", 1239 | "anguilla": "🇦🇮", 1240 | "antarctica": "🇦🇶", 1241 | "antigua_barbuda": "🇦🇬", 1242 | "argentina": "🇦🇷", 1243 | "armenia": "🇦🇲", 1244 | "aruba": "🇦🇼", 1245 | "australia": "🇦🇺", 1246 | "austria": "🇦🇹", 1247 | "azerbaijan": "🇦🇿", 1248 | "bahamas": "🇧🇸", 1249 | "bahrain": "🇧🇭", 1250 | "bangladesh": "🇧🇩", 1251 | "barbados": "🇧🇧", 1252 | "belarus": "🇧🇾", 1253 | "belgium": "🇧🇪", 1254 | "belize": "🇧🇿", 1255 | "benin": "🇧🇯", 1256 | "bermuda": "🇧🇲", 1257 | "bhutan": "🇧🇹", 1258 | "bolivia": "🇧🇴", 1259 | "caribbean_netherlands": "🇧🇶", 1260 | "bosnia_herzegovina": "🇧🇦", 1261 | "botswana": "🇧🇼", 1262 | "brazil": "🇧🇷", 1263 | "british_indian_ocean_territory": "🇮🇴", 1264 | "british_virgin_islands": "🇻🇬", 1265 | "brunei": "🇧🇳", 1266 | "bulgaria": "🇧🇬", 1267 | "burkina_faso": "🇧🇫", 1268 | "burundi": "🇧🇮", 1269 | "cape_verde": "🇨🇻", 1270 | "cambodia": "🇰🇭", 1271 | "cameroon": "🇨🇲", 1272 | "canada": "🇨🇦", 1273 | "canary_islands": "🇮🇨", 1274 | "cayman_islands": "🇰🇾", 1275 | "central_african_republic": "🇨🇫", 1276 | "chad": "🇹🇩", 1277 | "chile": "🇨🇱", 1278 | "cn": "🇨🇳", 1279 | "christmas_island": "🇨🇽", 1280 | "cocos_islands": "🇨🇨", 1281 | "colombia": "🇨🇴", 1282 | "comoros": "🇰🇲", 1283 | "congo_brazzaville": "🇨🇬", 1284 | "congo_kinshasa": "🇨🇩", 1285 | "cook_islands": "🇨🇰", 1286 | "costa_rica": "🇨🇷", 1287 | "cote_divoire": "🇨🇮", 1288 | "croatia": "🇭🇷", 1289 | "cuba": "🇨🇺", 1290 | "curacao": "🇨🇼", 1291 | "cyprus": "🇨🇾", 1292 | "czech_republic": "🇨🇿", 1293 | "denmark": "🇩🇰", 1294 | "djibouti": "🇩🇯", 1295 | "dominica": "🇩🇲", 1296 | "dominican_republic": "🇩🇴", 1297 | "ecuador": "🇪🇨", 1298 | "egypt": "🇪🇬", 1299 | "el_salvador": "🇸🇻", 1300 | "equatorial_guinea": "🇬🇶", 1301 | "eritrea": "🇪🇷", 1302 | "estonia": "🇪🇪", 1303 | "ethiopia": "🇪🇹", 1304 | "eu": "🇪🇺", 1305 | "european_union": "🇪🇺", 1306 | "falkland_islands": "🇫🇰", 1307 | "faroe_islands": "🇫🇴", 1308 | "fiji": "🇫🇯", 1309 | "finland": "🇫🇮", 1310 | "fr": "🇫🇷", 1311 | "french_guiana": "🇬🇫", 1312 | "french_polynesia": "🇵🇫", 1313 | "french_southern_territories": "🇹🇫", 1314 | "gabon": "🇬🇦", 1315 | "gambia": "🇬🇲", 1316 | "georgia": "🇬🇪", 1317 | "de": "🇩🇪", 1318 | "ghana": "🇬🇭", 1319 | "gibraltar": "🇬🇮", 1320 | "greece": "🇬🇷", 1321 | "greenland": "🇬🇱", 1322 | "grenada": "🇬🇩", 1323 | "guadeloupe": "🇬🇵", 1324 | "guam": "🇬🇺", 1325 | "guatemala": "🇬🇹", 1326 | "guernsey": "🇬🇬", 1327 | "guinea": "🇬🇳", 1328 | "guinea_bissau": "🇬🇼", 1329 | "guyana": "🇬🇾", 1330 | "haiti": "🇭🇹", 1331 | "honduras": "🇭🇳", 1332 | "hong_kong": "🇭🇰", 1333 | "hungary": "🇭🇺", 1334 | "iceland": "🇮🇸", 1335 | "india": "🇮🇳", 1336 | "indonesia": "🇮🇩", 1337 | "iran": "🇮🇷", 1338 | "iraq": "🇮🇶", 1339 | "ireland": "🇮🇪", 1340 | "isle_of_man": "🇮🇲", 1341 | "israel": "🇮🇱", 1342 | "it": "🇮🇹", 1343 | "jamaica": "🇯🇲", 1344 | "jp": "🇯🇵", 1345 | "crossed_flags": "🎌", 1346 | "jersey": "🇯🇪", 1347 | "jordan": "🇯🇴", 1348 | "kazakhstan": "🇰🇿", 1349 | "kenya": "🇰🇪", 1350 | "kiribati": "🇰🇮", 1351 | "kosovo": "🇽🇰", 1352 | "kuwait": "🇰🇼", 1353 | "kyrgyzstan": "🇰🇬", 1354 | "laos": "🇱🇦", 1355 | "latvia": "🇱🇻", 1356 | "lebanon": "🇱🇧", 1357 | "lesotho": "🇱🇸", 1358 | "liberia": "🇱🇷", 1359 | "libya": "🇱🇾", 1360 | "liechtenstein": "🇱🇮", 1361 | "lithuania": "🇱🇹", 1362 | "luxembourg": "🇱🇺", 1363 | "macau": "🇲🇴", 1364 | "macedonia": "🇲🇰", 1365 | "madagascar": "🇲🇬", 1366 | "malawi": "🇲🇼", 1367 | "malaysia": "🇲🇾", 1368 | "maldives": "🇲🇻", 1369 | "mali": "🇲🇱", 1370 | "malta": "🇲🇹", 1371 | "marshall_islands": "🇲🇭", 1372 | "martinique": "🇲🇶", 1373 | "mauritania": "🇲🇷", 1374 | "mauritius": "🇲🇺", 1375 | "mayotte": "🇾🇹", 1376 | "mexico": "🇲🇽", 1377 | "micronesia": "🇫🇲", 1378 | "moldova": "🇲🇩", 1379 | "monaco": "🇲🇨", 1380 | "mongolia": "🇲🇳", 1381 | "montenegro": "🇲🇪", 1382 | "montserrat": "🇲🇸", 1383 | "morocco": "🇲🇦", 1384 | "mozambique": "🇲🇿", 1385 | "myanmar": "🇲🇲", 1386 | "namibia": "🇳🇦", 1387 | "nauru": "🇳🇷", 1388 | "nepal": "🇳🇵", 1389 | "netherlands": "🇳🇱", 1390 | "new_caledonia": "🇳🇨", 1391 | "new_zealand": "🇳🇿", 1392 | "nicaragua": "🇳🇮", 1393 | "niger": "🇳🇪", 1394 | "nigeria": "🇳🇬", 1395 | "niue": "🇳🇺", 1396 | "norfolk_island": "🇳🇫", 1397 | "northern_mariana_islands": "🇲🇵", 1398 | "north_korea": "🇰🇵", 1399 | "norway": "🇳🇴", 1400 | "oman": "🇴🇲", 1401 | "pakistan": "🇵🇰", 1402 | "palau": "🇵🇼", 1403 | "palestinian_territories": "🇵🇸", 1404 | "panama": "🇵🇦", 1405 | "papua_new_guinea": "🇵🇬", 1406 | "paraguay": "🇵🇾", 1407 | "peru": "🇵🇪", 1408 | "philippines": "🇵🇭", 1409 | "pitcairn_islands": "🇵🇳", 1410 | "poland": "🇵🇱", 1411 | "portugal": "🇵🇹", 1412 | "puerto_rico": "🇵🇷", 1413 | "qatar": "🇶🇦", 1414 | "reunion": "🇷🇪", 1415 | "romania": "🇷🇴", 1416 | "ru": "🇷🇺", 1417 | "rwanda": "🇷🇼", 1418 | "st_barthelemy": "🇧🇱", 1419 | "st_helena": "🇸🇭", 1420 | "st_kitts_nevis": "🇰🇳", 1421 | "st_lucia": "🇱🇨", 1422 | "st_pierre_miquelon": "🇵🇲", 1423 | "st_vincent_grenadines": "🇻🇨", 1424 | "samoa": "🇼🇸", 1425 | "san_marino": "🇸🇲", 1426 | "sao_tome_principe": "🇸🇹", 1427 | "saudi_arabia": "🇸🇦", 1428 | "senegal": "🇸🇳", 1429 | "serbia": "🇷🇸", 1430 | "seychelles": "🇸🇨", 1431 | "sierra_leone": "🇸🇱", 1432 | "singapore": "🇸🇬", 1433 | "sint_maarten": "🇸🇽", 1434 | "slovakia": "🇸🇰", 1435 | "slovenia": "🇸🇮", 1436 | "solomon_islands": "🇸🇧", 1437 | "somalia": "🇸🇴", 1438 | "south_africa": "🇿🇦", 1439 | "south_georgia_south_sandwich_islands": "🇬🇸", 1440 | "kr": "🇰🇷", 1441 | "south_sudan": "🇸🇸", 1442 | "es": "🇪🇸", 1443 | "sri_lanka": "🇱🇰", 1444 | "sudan": "🇸🇩", 1445 | "suriname": "🇸🇷", 1446 | "swaziland": "🇸🇿", 1447 | "sweden": "🇸🇪", 1448 | "switzerland": "🇨🇭", 1449 | "syria": "🇸🇾", 1450 | "taiwan": "🇹🇼", 1451 | "tajikistan": "🇹🇯", 1452 | "tanzania": "🇹🇿", 1453 | "thailand": "🇹🇭", 1454 | "timor_leste": "🇹🇱", 1455 | "togo": "🇹🇬", 1456 | "tokelau": "🇹🇰", 1457 | "tonga": "🇹🇴", 1458 | "trinidad_tobago": "🇹🇹", 1459 | "tunisia": "🇹🇳", 1460 | "tr": "🇹🇷", 1461 | "turkmenistan": "🇹🇲", 1462 | "turks_caicos_islands": "🇹🇨", 1463 | "tuvalu": "🇹🇻", 1464 | "uganda": "🇺🇬", 1465 | "ukraine": "🇺🇦", 1466 | "united_arab_emirates": "🇦🇪", 1467 | "gb": "🇬🇧", 1468 | "uk": "🇬🇧", 1469 | "us": "🇺🇸", 1470 | "us_virgin_islands": "🇻🇮", 1471 | "uruguay": "🇺🇾", 1472 | "uzbekistan": "🇺🇿", 1473 | "vanuatu": "🇻🇺", 1474 | "vatican_city": "🇻🇦", 1475 | "venezuela": "🇻🇪", 1476 | "vietnam": "🇻🇳", 1477 | "wallis_futuna": "🇼🇫", 1478 | "western_sahara": "🇪🇭", 1479 | "yemen": "🇾🇪", 1480 | "zambia": "🇿🇲", 1481 | "zimbabwe": "🇿🇼" 1482 | } -------------------------------------------------------------------------------- /src/files/unicodeDesc.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | People 4 | 5 | :bowtie: 6 | :smile: 7 | :laughing: 8 | :blush: 9 | :smiley: 10 | :relaxed: 11 | :smirk: 12 | :heart_eyes: 13 | :kissing_heart: 14 | :kissing_closed_eyes: 15 | :flushed: 16 | :relieved: 17 | :satisfied: 18 | :grin: 19 | :wink: 20 | :stuck_out_tongue_winking_eye: 21 | :stuck_out_tongue_closed_eyes: 22 | :grinning: 23 | :kissing: 24 | :kissing_smiling_eyes: 25 | :stuck_out_tongue: 26 | :sleeping: 27 | :worried: 28 | :frowning: 29 | :anguished: 30 | :open_mouth: 31 | :grimacing: 32 | :confused: 33 | :hushed: 34 | :expressionless: 35 | :unamused: 36 | :sweat_smile: 37 | :sweat: 38 | :disappointed_relieved: 39 | :weary: 40 | :pensive: 41 | :disappointed: 42 | :confounded: 43 | :fearful: 44 | :cold_sweat: 45 | :persevere: 46 | :cry: 47 | :sob: 48 | :joy: 49 | :astonished: 50 | :scream: 51 | :neckbeard: 52 | :tired_face: 53 | :angry: 54 | :rage: 55 | :triumph: 56 | :sleepy: 57 | :yum: 58 | :mask: 59 | :sunglasses: 60 | :dizzy_face: 61 | :imp: 62 | :smiling_imp: 63 | :neutral_face: 64 | :no_mouth: 65 | :innocent: 66 | :alien: 67 | :yellow_heart: 68 | :blue_heart: 69 | :purple_heart: 70 | :heart: 71 | :green_heart: 72 | :broken_heart: 73 | :heartbeat: 74 | :heartpulse: 75 | :two_hearts: 76 | :revolving_hearts: 77 | :cupid: 78 | :sparkling_heart: 79 | :sparkles: 80 | :star: 81 | :star2: 82 | :dizzy: 83 | :boom: 84 | :collision: 85 | :anger: 86 | :exclamation: 87 | :question: 88 | :grey_exclamation: 89 | :grey_question: 90 | :zzz: 91 | :dash: 92 | :sweat_drops: 93 | :notes: 94 | :musical_note: 95 | :fire: 96 | :hankey: 97 | :poop: 98 | :shit: 99 | :+1: 100 | :thumbsup: 101 | :-1: 102 | :thumbsdown: 103 | :ok_hand: 104 | :punch: 105 | :facepunch: 106 | :fist: 107 | :v: 108 | :wave: 109 | :hand: 110 | :raised_hand: 111 | :open_hands: 112 | :point_up: 113 | :point_down: 114 | :point_left: 115 | :point_right: 116 | :raised_hands: 117 | :pray: 118 | :point_up_2: 119 | :clap: 120 | :muscle: 121 | :metal: 122 | :fu: 123 | :walking: 124 | :runner: 125 | :running: 126 | :couple: 127 | :family: 128 | :two_men_holding_hands: 129 | :two_women_holding_hands: 130 | :dancer: 131 | :dancers: 132 | :ok_woman: 133 | :no_good: 134 | :information_desk_person: 135 | :raising_hand: 136 | :bride_with_veil: 137 | :person_with_pouting_face: 138 | :person_frowning: 139 | :bow: 140 | :couple_with_heart: 141 | :massage: 142 | :haircut: 143 | :nail_care: 144 | :boy: 145 | :girl: 146 | :woman: 147 | :man: 148 | :baby: 149 | :older_woman: 150 | :older_man: 151 | :person_with_blond_hair: 152 | :man_with_gua_pi_mao: 153 | :man_with_turban: 154 | :construction_worker: 155 | :cop: 156 | :angel: 157 | :princess: 158 | :smiley_cat: 159 | :smile_cat: 160 | :heart_eyes_cat: 161 | :kissing_cat: 162 | :smirk_cat: 163 | :scream_cat: 164 | :crying_cat_face: 165 | :joy_cat: 166 | :pouting_cat: 167 | :japanese_ogre: 168 | :japanese_goblin: 169 | :see_no_evil: 170 | :hear_no_evil: 171 | :speak_no_evil: 172 | :guardsman: 173 | :skull: 174 | :feet: 175 | :lips: 176 | :kiss: 177 | :droplet: 178 | :ear: 179 | :eyes: 180 | :nose: 181 | :tongue: 182 | :love_letter: 183 | :bust_in_silhouette: 184 | :busts_in_silhouette: 185 | :speech_balloon: 186 | :thought_balloon: 187 | :feelsgood: 188 | :finnadie: 189 | :goberserk: 190 | :godmode: 191 | :hurtrealbad: 192 | :rage1: 193 | :rage2: 194 | :rage3: 195 | :rage4: 196 | :suspect: 197 | :trollface: 198 | 199 | Nature 200 | 201 | :sunny: 202 | :umbrella: 203 | :cloud: 204 | :snowflake: 205 | :snowman: 206 | :zap: 207 | :cyclone: 208 | :foggy: 209 | :ocean: 210 | :cat: 211 | :dog: 212 | :mouse: 213 | :hamster: 214 | :rabbit: 215 | :wolf: 216 | :frog: 217 | :tiger: 218 | :koala: 219 | :bear: 220 | :pig: 221 | :pig_nose: 222 | :cow: 223 | :boar: 224 | :monkey_face: 225 | :monkey: 226 | :horse: 227 | :racehorse: 228 | :camel: 229 | :sheep: 230 | :elephant: 231 | :panda_face: 232 | :snake: 233 | :bird: 234 | :baby_chick: 235 | :hatched_chick: 236 | :hatching_chick: 237 | :chicken: 238 | :penguin: 239 | :turtle: 240 | :bug: 241 | :honeybee: 242 | :ant: 243 | :beetle: 244 | :snail: 245 | :octopus: 246 | :tropical_fish: 247 | :fish: 248 | :whale: 249 | :whale2: 250 | :dolphin: 251 | :cow2: 252 | :ram: 253 | :rat: 254 | :water_buffalo: 255 | :tiger2: 256 | :rabbit2: 257 | :dragon: 258 | :goat: 259 | :rooster: 260 | :dog2: 261 | :pig2: 262 | :mouse2: 263 | :ox: 264 | :dragon_face: 265 | :blowfish: 266 | :crocodile: 267 | :dromedary_camel: 268 | :leopard: 269 | :cat2: 270 | :poodle: 271 | :paw_prints: 272 | :bouquet: 273 | :cherry_blossom: 274 | :tulip: 275 | :four_leaf_clover: 276 | :rose: 277 | :sunflower: 278 | :hibiscus: 279 | :maple_leaf: 280 | :leaves: 281 | :fallen_leaf: 282 | :herb: 283 | :mushroom: 284 | :cactus: 285 | :palm_tree: 286 | :evergreen_tree: 287 | :deciduous_tree: 288 | :chestnut: 289 | :seedling: 290 | :blossom: 291 | :ear_of_rice: 292 | :shell: 293 | :globe_with_meridians: 294 | :sun_with_face: 295 | :full_moon_with_face: 296 | :new_moon_with_face: 297 | :new_moon: 298 | :waxing_crescent_moon: 299 | :first_quarter_moon: 300 | :waxing_gibbous_moon: 301 | :full_moon: 302 | :waning_gibbous_moon: 303 | :last_quarter_moon: 304 | :waning_crescent_moon: 305 | :last_quarter_moon_with_face: 306 | :first_quarter_moon_with_face: 307 | :moon: 308 | :earth_africa: 309 | :earth_americas: 310 | :earth_asia: 311 | :volcano: 312 | :milky_way: 313 | :partly_sunny: 314 | :octocat: 315 | :squirrel: 316 | 317 | Objects 318 | 319 | :bamboo: 320 | :gift_heart: 321 | :dolls: 322 | :school_satchel: 323 | :mortar_board: 324 | :flags: 325 | :fireworks: 326 | :sparkler: 327 | :wind_chime: 328 | :rice_scene: 329 | :jack_o_lantern: 330 | :ghost: 331 | :santa: 332 | :christmas_tree: 333 | :gift: 334 | :bell: 335 | :no_bell: 336 | :tanabata_tree: 337 | :tada: 338 | :confetti_ball: 339 | :balloon: 340 | :crystal_ball: 341 | :cd: 342 | :dvd: 343 | :floppy_disk: 344 | :camera: 345 | :video_camera: 346 | :movie_camera: 347 | :computer: 348 | :tv: 349 | :iphone: 350 | :phone: 351 | :telephone: 352 | :telephone_receiver: 353 | :pager: 354 | :fax: 355 | :minidisc: 356 | :vhs: 357 | :sound: 358 | :speaker: 359 | :mute: 360 | :loudspeaker: 361 | :mega: 362 | :hourglass: 363 | :hourglass_flowing_sand: 364 | :alarm_clock: 365 | :watch: 366 | :radio: 367 | :satellite: 368 | :loop: 369 | :mag: 370 | :mag_right: 371 | :unlock: 372 | :lock: 373 | :lock_with_ink_pen: 374 | :closed_lock_with_key: 375 | :key: 376 | :bulb: 377 | :flashlight: 378 | :high_brightness: 379 | :low_brightness: 380 | :electric_plug: 381 | :battery: 382 | :calling: 383 | :email: 384 | :mailbox: 385 | :postbox: 386 | :bath: 387 | :bathtub: 388 | :shower: 389 | :toilet: 390 | :wrench: 391 | :nut_and_bolt: 392 | :hammer: 393 | :seat: 394 | :moneybag: 395 | :yen: 396 | :dollar: 397 | :pound: 398 | :euro: 399 | :credit_card: 400 | :money_with_wings: 401 | :e-mail: 402 | :inbox_tray: 403 | :outbox_tray: 404 | :envelope: 405 | :incoming_envelope: 406 | :postal_horn: 407 | :mailbox_closed: 408 | :mailbox_with_mail: 409 | :mailbox_with_no_mail: 410 | :package: 411 | :door: 412 | :smoking: 413 | :bomb: 414 | :gun: 415 | :hocho: 416 | :pill: 417 | :syringe: 418 | :page_facing_up: 419 | :page_with_curl: 420 | :bookmark_tabs: 421 | :bar_chart: 422 | :chart_with_upwards_trend: 423 | :chart_with_downwards_trend: 424 | :scroll: 425 | :clipboard: 426 | :calendar: 427 | :date: 428 | :card_index: 429 | :file_folder: 430 | :open_file_folder: 431 | :scissors: 432 | :pushpin: 433 | :paperclip: 434 | :black_nib: 435 | :pencil2: 436 | :straight_ruler: 437 | :triangular_ruler: 438 | :closed_book: 439 | :green_book: 440 | :blue_book: 441 | :orange_book: 442 | :notebook: 443 | :notebook_with_decorative_cover: 444 | :ledger: 445 | :books: 446 | :bookmark: 447 | :name_badge: 448 | :microscope: 449 | :telescope: 450 | :newspaper: 451 | :football: 452 | :basketball: 453 | :soccer: 454 | :baseball: 455 | :tennis: 456 | :8ball: 457 | :rugby_football: 458 | :bowling: 459 | :golf: 460 | :mountain_bicyclist: 461 | :bicyclist: 462 | :horse_racing: 463 | :snowboarder: 464 | :swimmer: 465 | :surfer: 466 | :ski: 467 | :spades: 468 | :hearts: 469 | :clubs: 470 | :diamonds: 471 | :gem: 472 | :ring: 473 | :trophy: 474 | :musical_score: 475 | :musical_keyboard: 476 | :violin: 477 | :space_invader: 478 | :video_game: 479 | :black_joker: 480 | :flower_playing_cards: 481 | :game_die: 482 | :dart: 483 | :mahjong: 484 | :clapper: 485 | :memo: 486 | :pencil: 487 | :book: 488 | :art: 489 | :microphone: 490 | :headphones: 491 | :trumpet: 492 | :saxophone: 493 | :guitar: 494 | :shoe: 495 | :sandal: 496 | :high_heel: 497 | :lipstick: 498 | :boot: 499 | :shirt: 500 | :tshirt: 501 | :necktie: 502 | :womans_clothes: 503 | :dress: 504 | :running_shirt_with_sash: 505 | :jeans: 506 | :kimono: 507 | :bikini: 508 | :ribbon: 509 | :tophat: 510 | :crown: 511 | :womans_hat: 512 | :mans_shoe: 513 | :closed_umbrella: 514 | :briefcase: 515 | :handbag: 516 | :pouch: 517 | :purse: 518 | :eyeglasses: 519 | :fishing_pole_and_fish: 520 | :coffee: 521 | :tea: 522 | :sake: 523 | :baby_bottle: 524 | :beer: 525 | :beers: 526 | :cocktail: 527 | :tropical_drink: 528 | :wine_glass: 529 | :fork_and_knife: 530 | :pizza: 531 | :hamburger: 532 | :fries: 533 | :poultry_leg: 534 | :meat_on_bone: 535 | :spaghetti: 536 | :curry: 537 | :fried_shrimp: 538 | :bento: 539 | :sushi: 540 | :fish_cake: 541 | :rice_ball: 542 | :rice_cracker: 543 | :rice: 544 | :ramen: 545 | :stew: 546 | :oden: 547 | :dango: 548 | :egg: 549 | :bread: 550 | :doughnut: 551 | :custard: 552 | :icecream: 553 | :ice_cream: 554 | :shaved_ice: 555 | :birthday: 556 | :cake: 557 | :cookie: 558 | :chocolate_bar: 559 | :candy: 560 | :lollipop: 561 | :honey_pot: 562 | :apple: 563 | :green_apple: 564 | :tangerine: 565 | :lemon: 566 | :cherries: 567 | :grapes: 568 | :watermelon: 569 | :strawberry: 570 | :peach: 571 | :melon: 572 | :banana: 573 | :pear: 574 | :pineapple: 575 | :sweet_potato: 576 | :eggplant: 577 | :tomato: 578 | :corn: 579 | 580 | Places 581 | 582 | :house: 583 | :house_with_garden: 584 | :school: 585 | :office: 586 | :post_office: 587 | :hospital: 588 | :bank: 589 | :convenience_store: 590 | :love_hotel: 591 | :hotel: 592 | :wedding: 593 | :church: 594 | :department_store: 595 | :european_post_office: 596 | :city_sunrise: 597 | :city_sunset: 598 | :japanese_castle: 599 | :european_castle: 600 | :tent: 601 | :factory: 602 | :tokyo_tower: 603 | :japan: 604 | :mount_fuji: 605 | :sunrise_over_mountains: 606 | :sunrise: 607 | :stars: 608 | :statue_of_liberty: 609 | :bridge_at_night: 610 | :carousel_horse: 611 | :rainbow: 612 | :ferris_wheel: 613 | :fountain: 614 | :roller_coaster: 615 | :ship: 616 | :speedboat: 617 | :boat: 618 | :sailboat: 619 | :rowboat: 620 | :anchor: 621 | :rocket: 622 | :airplane: 623 | :helicopter: 624 | :steam_locomotive: 625 | :tram: 626 | :mountain_railway: 627 | :bike: 628 | :aerial_tramway: 629 | :suspension_railway: 630 | :mountain_cableway: 631 | :tractor: 632 | :blue_car: 633 | :oncoming_automobile: 634 | :car: 635 | :red_car: 636 | :taxi: 637 | :oncoming_taxi: 638 | :articulated_lorry: 639 | :bus: 640 | :oncoming_bus: 641 | :rotating_light: 642 | :police_car: 643 | :oncoming_police_car: 644 | :fire_engine: 645 | :ambulance: 646 | :minibus: 647 | :truck: 648 | :train: 649 | :station: 650 | :train2: 651 | :bullettrain_front: 652 | :bullettrain_side: 653 | :light_rail: 654 | :monorail: 655 | :railway_car: 656 | :trolleybus: 657 | :ticket: 658 | :fuelpump: 659 | :vertical_traffic_light: 660 | :traffic_light: 661 | :warning: 662 | :construction: 663 | :beginner: 664 | :atm: 665 | :slot_machine: 666 | :busstop: 667 | :barber: 668 | :hotsprings: 669 | :checkered_flag: 670 | :crossed_flags: 671 | :izakaya_lantern: 672 | :moyai: 673 | :circus_tent: 674 | :performing_arts: 675 | :round_pushpin: 676 | :triangular_flag_on_post: 677 | :jp: 678 | :kr: 679 | :cn: 680 | :us: 681 | :fr: 682 | :es: 683 | :it: 684 | :ru: 685 | :gb: 686 | :uk: 687 | :de: 688 | 689 | Symbols 690 | 691 | :one: 692 | :two: 693 | :three: 694 | :four: 695 | :five: 696 | :six: 697 | :seven: 698 | :eight: 699 | :nine: 700 | :keycap_ten: 701 | :1234: 702 | :zero: 703 | :hash: 704 | :symbols: 705 | :arrow_backward: 706 | :arrow_down: 707 | :arrow_forward: 708 | :arrow_left: 709 | :capital_abcd: 710 | :abcd: 711 | :abc: 712 | :arrow_lower_left: 713 | :arrow_lower_right: 714 | :arrow_right: 715 | :arrow_up: 716 | :arrow_upper_left: 717 | :arrow_upper_right: 718 | :arrow_double_down: 719 | :arrow_double_up: 720 | :arrow_down_small: 721 | :arrow_heading_down: 722 | :arrow_heading_up: 723 | :leftwards_arrow_with_hook: 724 | :arrow_right_hook: 725 | :left_right_arrow: 726 | :arrow_up_down: 727 | :arrow_up_small: 728 | :arrows_clockwise: 729 | :arrows_counterclockwise: 730 | :rewind: 731 | :fast_forward: 732 | :information_source: 733 | :ok: 734 | :twisted_rightwards_arrows: 735 | :repeat: 736 | :repeat_one: 737 | :new: 738 | :top: 739 | :up: 740 | :cool: 741 | :free: 742 | :ng: 743 | :cinema: 744 | :koko: 745 | :signal_strength: 746 | :u5272: 747 | :u5408: 748 | :u55b6: 749 | :u6307: 750 | :u6708: 751 | :u6709: 752 | :u6e80: 753 | :u7121: 754 | :u7533: 755 | :u7a7a: 756 | :u7981: 757 | :sa: 758 | :restroom: 759 | :mens: 760 | :womens: 761 | :baby_symbol: 762 | :no_smoking: 763 | :parking: 764 | :wheelchair: 765 | :metro: 766 | :baggage_claim: 767 | :accept: 768 | :wc: 769 | :potable_water: 770 | :put_litter_in_its_place: 771 | :secret: 772 | :congratulations: 773 | :m: 774 | :passport_control: 775 | :left_luggage: 776 | :customs: 777 | :ideograph_advantage: 778 | :cl: 779 | :sos: 780 | :id: 781 | :no_entry_sign: 782 | :underage: 783 | :no_mobile_phones: 784 | :do_not_litter: 785 | :non-potable_water: 786 | :no_bicycles: 787 | :no_pedestrians: 788 | :children_crossing: 789 | :no_entry: 790 | :eight_spoked_asterisk: 791 | :sparkle: 792 | :eight_pointed_black_star: 793 | :heart_decoration: 794 | :vs: 795 | :vibration_mode: 796 | :mobile_phone_off: 797 | :chart: 798 | :currency_exchange: 799 | :aries: 800 | :taurus: 801 | :gemini: 802 | :cancer: 803 | :leo: 804 | :virgo: 805 | :libra: 806 | :scorpius: 807 | :sagittarius: 808 | :capricorn: 809 | :aquarius: 810 | :pisces: 811 | :ophiuchus: 812 | :six_pointed_star: 813 | :negative_squared_cross_mark: 814 | :a: 815 | :b: 816 | :ab: 817 | :o2: 818 | :diamond_shape_with_a_dot_inside: 819 | :recycle: 820 | :end: 821 | :back: 822 | :on: 823 | :soon: 824 | :clock1: 825 | :clock130: 826 | :clock10: 827 | :clock1030: 828 | :clock11: 829 | :clock1130: 830 | :clock12: 831 | :clock1230: 832 | :clock2: 833 | :clock230: 834 | :clock3: 835 | :clock330: 836 | :clock4: 837 | :clock430: 838 | :clock5: 839 | :clock530: 840 | :clock6: 841 | :clock630: 842 | :clock7: 843 | :clock730: 844 | :clock8: 845 | :clock830: 846 | :clock9: 847 | :clock930: 848 | :heavy_dollar_sign: 849 | :copyright: 850 | :registered: 851 | :tm: 852 | :x: 853 | :heavy_exclamation_mark: 854 | :bangbang: 855 | :interrobang: 856 | :o: 857 | :heavy_multiplication_x: 858 | :heavy_plus_sign: 859 | :heavy_minus_sign: 860 | :heavy_division_sign: 861 | :white_flower: 862 | :100: 863 | :heavy_check_mark: 864 | :ballot_box_with_check: 865 | :radio_button: 866 | :link: 867 | :curly_loop: 868 | :wavy_dash: 869 | :part_alternation_mark: 870 | :trident: 871 | :black_small_square: 872 | :white_small_square: 873 | :black_medium_small_square: 874 | :white_medium_small_square: 875 | :black_medium_square: 876 | :white_medium_square: 877 | :black_large_square: 878 | :white_large_square: 879 | :white_check_mark: 880 | :black_square_button: 881 | :white_square_button: 882 | :black_circle: 883 | :white_circle: 884 | :red_circle: 885 | :large_blue_circle: 886 | :large_blue_diamond: 887 | :large_orange_diamond: 888 | :small_blue_diamond: 889 | :small_orange_diamond: 890 | :small_red_triangle: 891 | :small_red_triangle_down: 892 | :shipit: 893 | -------------------------------------------------------------------------------- /src/files/unicodeEmoji.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |

5 |

People

6 |

😄😆😊😃☺️😏😍😘😚😳😌😆😁😉😜😝😀😗😙😛😴😟😦😧😮😬😕😯😑😒😅😓😥😩😔😞😖😨😰😣😢😭😂😲😱😫😠😡😤😪😋😷😎😵👿😈😐😶😇👽💛💙💜❤️💚💔💓💗💕💞💘💖✨⭐️🌟💫💥💥💢❗️❓❕❔💤💨💦🎶🎵🔥💩💩💩👍👍👎👎👌👊👊✊✌️👋✋✋👐☝️👇👈👉🙌🙏👆👏💪🤘🖕🚶🏃🏃👫👪👬👭💃👯🙆🙅💁🙋👰🙎🙍🙇💑💆💇💅👦👧👩👨👶👵👴👱👲👳👷👮👼👸😺😸😻😽😼🙀😿😹😾👹👺🙈🙉🙊💂💀🐾👄💋💧👂👀👃👅💌👤👥💬💭

7 | 8 |

Nature

9 |

☀️☔️☁️❄️⛄️⚡️🌀🌁🌊🐱🐶🐭🐹🐰🐺🐸🐯🐨🐻🐷🐽🐮🐗🐵🐒🐴🐎🐫🐑🐘🐼🐍🐦🐤🐥🐣🐔🐧🐢🐛🐝🐜🐞🐌🐙🐠🐟🐳🐋🐬🐄🐏🐀🐃🐅🐇🐉🐐🐓🐕🐖🐁🐂🐲🐡🐊🐪🐆🐈🐩🐾💐🌸🌷🍀🌹🌻🌺🍁🍃🍂🌿🍄🌵🌴🌲🌳🌰🌱🌼🌾🐚🌐🌞🌝🌚🌑🌒🌓🌔🌕🌖🌗🌘🌜🌛🌔🌍🌎🌏🌋🌌⛅️

10 |

Objects

11 |

🎍💝🎎🎒🎓🎏🎆🎇🎐🎑🎃👻🎅🎄🎁🔔🔕🎋🎉🎊🎈🔮💿📀💾📷📹🎥💻📺📱☎️☎️📞📟📠💽📼🔉🔈🔇📢📣⌛️⏳⏰⌚️📻📡➿🔍🔎🔓🔒🔏🔐🔑💡🔦🔆🔅🔌🔋📲✉️📫📮🛀🛁🚿🚽🔧🔩🔨💺💰💴💵💷💶💳💸📧📥📤✉️📨📯📪📬📭📦🚪🚬💣🔫🔪💊💉📄📃📑📊📈📉📜📋📆📅📇📁📂✂️📌📎✒️✏️📏📐📕📗📘📙📓📔📒📚🔖📛🔬🔭📰🏈🏀⚽️⚾️🎾🎱🏉🎳⛳️🚵🚴🏇🏂🏊🏄🎿♠️♥️♣️♦️💎💍🏆🎼🎹🎻👾🎮🃏🎴🎲🎯🀄️🎬📝📝📖🎨🎤🎧🎺🎷🎸👞👡👠💄👢👕👕👔👚👗🎽👖👘👙🎀🎩👑👒👞🌂💼👜👝👛👓🎣☕️🍵🍶🍼🍺🍻🍸🍹🍷🍴🍕🍔🍟🍗🍖🍝🍛🍤🍱🍣🍥🍙🍘🍚🍜🍲🍢🍡🥚🍞🍩🍮🍦🍨🍧🎂🍰🍪🍫🍬🍭🍯🍎🍏🍊🍋🍒🍇🍉🍓🍑🍈🍌🍐🍍🍠🍆🍅🌽

12 |

Places

13 |

🏠🏡🏫🏢🏣🏥🏦🏪🏩🏨💒⛪️🏬🏤🌇🌆🏯🏰⛺️🏭🗼🗾🗻🌄🌅🌠🗽🌉🎠🌈🎡⛲️🎢🚢🚤⛵️⛵️🚣⚓️🚀✈️🚁🚂🚊🚞🚲🚡🚟🚠🚜🚙🚘🚗🚗🚕🚖🚛🚌🚍🚨🚓🚔🚒🚑🚐🚚🚋🚉🚆🚅🚄🚈🚝🚃🚎🎫⛽️🚦🚥⚠️🚧🔰🏧🎰🚏💈♨️🏁🎌🏮🗿🎪🎭📍🚩🇯🇵🇰🇷🇨🇳🇺🇸🇫🇷🇪🇸🇮🇹🇷🇺🇬🇧🇬🇧🇩🇪

14 |

Symbols

15 |

1️⃣2️⃣3️⃣4️⃣5️⃣6️⃣7️⃣8️⃣9️⃣🔟🔢0️⃣#️⃣🔣◀️⬇️▶️⬅️🔠🔡🔤↙️↘️➡️⬆️↖️↗️⏬⏫🔽⤵️⤴️↩️↪️↔️↕️🔼🔃🔄⏪⏩ℹ️🆗🔀🔁🔂🆕🔝🆙🆒🆓🆖🎦🈁📶🈹🈴🈺🈯️🈷️🈶🈵🈚️🈸🈳🈲🈂️🚻🚹🚺🚼🚭🅿️♿️🚇🛄🉑🚾🚰🚮㊙️㊗️Ⓜ️🛂🛅🛃🉐🆑🆘🆔🚫🔞📵🚯🚱🚳🚷🚸⛔️✳️❇️✴️💟🆚📳📴💹💱♈️♉️♊️♋️♌️♍️♎️♏️♐️♑️♒️♓️⛎🔯❎🅰️🅱️🆎🅾️💠♻️🔚🔙🔛🔜🕐🕜🕙🕥🕚🕦🕛🕧🕑🕝🕒🕞🕓🕟🕔🕠🕕🕡🕖🕢🕗🕣🕘🕤💲©️®️™️❌❗️‼️⁉️⭕️✖️➕➖➗💮💯✔️☑️🔘🔗➰〰️〽️🔱▪️▫️◾️◽️◼️◻️⬛️⬜️✅🔲🔳⚫️⚪️🔴🔵🔷🔶🔹🔸🔺🔻

16 |

-------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -* 2 | 3 | import os, logging, time, signal, sys, requests, traceback, json 4 | from util.mysql_helper import * 5 | from collections import deque 6 | from lxml import etree 7 | 8 | # 用来获取 containerid 9 | INFO_URL = 'https://m.weibo.cn/api/container/getIndex?type=uid&value={}' 10 | # 注意不同人的微博 containerid 是不同的 11 | WEIBO_URL = 'https://m.weibo.cn/api/container/getIndex?containerid={}&page={}' 12 | LONG_WEIBO_URL = 'https://m.weibo.cn/statuses/extend?id={}' 13 | COMMENT_URL = 'https://m.weibo.cn/api/comments/show?id={}&page={}' 14 | FOLLOWING_URL = 'https://m.weibo.cn/api/container/getIndex?containerid=231051_-_followers_-_{}&page={}' 15 | 16 | 17 | class WBSpider(): 18 | 19 | def init_logging(self, name='crawling', log_level=logging.INFO): 20 | file_dir = os.path.dirname(os.path.realpath('__file__')) + "/log" 21 | # 没有目录的时候自动创建 22 | if not os.path.isdir(file_dir): 23 | os.makedirs(file_dir) 24 | fileh = logging.FileHandler(file_dir+f'/{name}-{logging.getLevelName(log_level)}.log', 'w', encoding='utf-8') 25 | formatter = logging.Formatter("%(asctime)s;%(levelname)s;%(message)s", 26 | "%Y-%m-%d %H:%M:%S") 27 | fileh.setFormatter(formatter) 28 | 29 | log = logging.getLogger() # root logger 30 | for hdlr in log.handlers[:]: # remove all old handlers 31 | log.removeHandler(hdlr) 32 | log.addHandler(fileh) # set the new handler 33 | log.setLevel(log_level) 34 | 35 | return fileh 36 | 37 | def fetch_table(self, table='Crawling'): 38 | self.MYCURSOR.execute(f'SELECT * FROM {table}') 39 | columns = [col[0] for col in self.MYCURSOR.description] 40 | return [dict(zip(columns, row)) for row in self.MYCURSOR.fetchall()] 41 | 42 | def sel_from_table(self, table, key, value): 43 | self.MYCURSOR.execute(f"SELECT * FROM {table} WHERE {key} = '{value}'") 44 | columns = [col[0] for col in self.MYCURSOR.description] 45 | return [dict(zip(columns, row)) for row in self.MYCURSOR.fetchall()] 46 | 47 | def del_from_table(self, table, key, value): 48 | self.MYCURSOR.execute(f"DELETE FROM {table} WHERE {key} = '{value}'") 49 | self.MYDB.commit() 50 | def ins_to_table(self, table, data_dict): 51 | try: 52 | columns = ', '.join(data_dict.keys()) 53 | placeholders = ', '.join(['%s'] * len(data_dict)) 54 | sql = "INSERT INTO %s ( %s ) VALUES ( %s )" % (table, columns, placeholders) 55 | for key in data_dict.keys(): 56 | if isinstance(data_dict[key], list): 57 | data_dict[key] = json.dumps(data_dict[key]) 58 | self.MYCURSOR.execute(sql, list(data_dict.values())) 59 | self.MYDB.commit() 60 | except mysql.connector.errors.IntegrityError as e: 61 | # 遇到重复插入直接跳过 62 | pass 63 | 64 | def init_crawl(self): 65 | # 待爬取队列，采用广度优先搜索 66 | self.crawling = deque(self.fetch_table()) 67 | self.crawled = deque(self.fetch_table('Crawled')) 68 | def save_crawl_to_bmob(self): 69 | for crawling_item in self.crawling: 70 | crawling_item.save() 71 | for crawled_item in crawled: 72 | crawled_item.save() 73 | 74 | def init_session(self): 75 | self.session = requests.Session() 76 | cookies_dict = dict([l.split("=", 1) for l in config['weibo']['COOKIE'].split("; ")]) 77 | # https://blog.csdn.net/mgxcool/article/details/52663382 78 | requests.utils.add_dict_to_cookiejar(self.session.cookies, cookies_dict) 79 | 80 | def init_mysql(self): 81 | create_db_if_not_exists() 82 | (self.MYDB, self.MYCURSOR) = create_table_if_not_exists() 83 | 84 | def __init__(self): 85 | self.init_logging() 86 | logging.info('正在初始化数据库...') 87 | self.init_mysql() 88 | logging.info('正在初始化爬取队列...') 89 | self.init_crawl() 90 | self.init_session() 91 | 92 | def get_data(self, url): 93 | # 每次请求之前等待数秒，防止因为速度过快被封 94 | time.sleep(config['crawl']['PERIOD']) 95 | res = self.session.get(url).json() 96 | if 'msg' in res.keys() and res['msg'] == '请求过于频繁,歇歇吧': 97 | logging.warning(f"当前请求过于频繁，等待 {config['crawl']['FORBID_PAUSE']} 秒") 98 | time.sleep(config['crawl']['FORBID_PAUSE']) 99 | logging.warnng(f'等待完毕，重新请求') 100 | return get_data(url) 101 | return res 102 | 103 | def crawl_user_following(self, uid): 104 | """ 105 | 返回此用户的所有 following 的 (uid, uname)（字典） 106 | """ 107 | try: 108 | result = [] 109 | cur_page = 1 110 | while True: 111 | logging.info(f'正在爬取 {uid} 的第 {cur_page} 页的 following') 112 | # https://m.weibo.cn/api/container/getIndex?containerid=231051_-_followers_-_1669879400&page=0 113 | url = FOLLOWING_URL.format(uid, cur_page) 114 | data = self.get_data(url) 115 | if len(data['data']['cards']) == 0: 116 | logging.info(f'用户 {uid} 的 following 爬取完毕') 117 | return result 118 | 119 | for card in data['data']['cards']: 120 | for card_group_item in card['card_group']: 121 | # 只有类型为 10 才是真正的关注列表 122 | if card_group_item['card_type'] != 10: 123 | continue 124 | result.append({'uid': card_group_item['user']['id'], 'uname': card_group_item['user']['screen_name']}) 125 | cur_page += 1 126 | logging.info(f'将新增加 {len(result)} 个 following 到队列中') 127 | return result 128 | except: 129 | logging.error('following 抓取出错') 130 | logging.error(traceback.format_exc()) 131 | return [] 132 | 133 | def get_weibo_containerid(self, uid): 134 | try: 135 | # https://m.weibo.cn/api/container/getIndex?type=uid&value=1669879400 136 | url = INFO_URL.format(uid) 137 | data = self.get_data(url) 138 | return data['data']['tabsInfo']['tabs'][1]['containerid'] 139 | except: 140 | logging.error('containerid 抓取出错') 141 | logging.error(traceback.format_exc()) 142 | logging.error(data) 143 | def crawl_user_weibo(self, uid): 144 | """ 145 | 将所有的微博爬取到，并存储到 Weibo 表中 146 | """ 147 | try: 148 | containerid = self.get_weibo_containerid(uid) 149 | cur_page = 1 150 | while True: 151 | logging.info(f'正在爬取 {uid} 的第 {cur_page} 页微博') 152 | # https://m.weibo.cn/api/container/getIndex?containerid=1076031669879400&page=0 153 | url = WEIBO_URL.format(containerid, cur_page) 154 | data = self.get_data(url) 155 | if len(data['data']['cards']) == 0: 156 | logging.info(f'用户 {uid} 爬取完毕') 157 | return 158 | 159 | for card in data['data']['cards']: 160 | # 忽略广告等其他卡片 161 | if card["card_type"] != 9: 162 | continue 163 | mblog = card["mblog"] 164 | # 如果是转发微博的话，忽略 165 | if "retweeted_status" in mblog: 166 | continue 167 | 168 | selector = etree.HTML(mblog["text"]) 169 | a_text = selector.xpath("//a/text()") 170 | # 将 HTML 转换为 txt 171 | # 参考 https://www.zybuluo.com/Alston/note/778377 172 | text = etree.tostring(selector, method="text", encoding="UTF-8").decode('utf-8') 173 | img_emoji = selector.xpath("//span/img/@alt") 174 | 175 | weibo = {'uid': uid, 'text': text, 'mid': mblog['mid'], 'img_emoji': img_emoji} 176 | self.ins_to_table('Weibo', weibo) 177 | 178 | # 抓取评论 179 | self.crawl_weibo_comments(mblog['mid']) 180 | 181 | cur_page += 1 182 | except: 183 | logging.error('微博抓取出错') 184 | logging.error(traceback.format_exc()) 185 | logging.error(data) 186 | 187 | def crawl_weibo_comments(self, mid, max=10): 188 | """ 189 | 将某一篇微博的评论爬取 10 页，并存储到 Comment 表中，将 mid（博文唯一标识）设置为传入的 mid 190 | """ 191 | try: 192 | cur_page = 1 193 | for i in range(max): 194 | logging.info(f'正在抓取 {mid} 的第 {cur_page} 页评论') 195 | # https://m.weibo.cn/api/comments/show?id=4384122253963002&page=0 196 | url = COMMENT_URL.format(mid, cur_page) 197 | data = self.get_data(url) 198 | if data['msg'] == '暂无数据': 199 | break 200 | for comment in data['data']['data']: 201 | selector = etree.HTML(comment["text"]) 202 | cid = comment["id"] 203 | text = etree.tostring(selector, method="text", encoding="UTF-8").decode('utf-8') 204 | img_emoji = selector.xpath("//span/img/@alt") 205 | 206 | comment = {'cid': cid, 'mid': mid, 'text': text, 'img_emoji': img_emoji} 207 | self.ins_to_table('Comment', comment) 208 | 209 | cur_page += 1 210 | logging.info(f'微博 {mid} 爬取完毕') 211 | except: 212 | logging.error('评论抓取出错') 213 | logging.error(traceback.format_exc()) 214 | logging.error(data) 215 | 216 | def crawl(self, uid): 217 | """ 218 | 爬取 uid 所代表的用户 219 | 结束之后返回此用户的所有 following 的 (uid, uname)（字典） 220 | """ 221 | self.crawl_user_weibo(uid) 222 | return self.crawl_user_following(uid) 223 | 224 | def startBFS(self): 225 | """ 226 | 开始爬取（广度优先搜索） 227 | """ 228 | # 理论上会结束，实际上并不会结束 229 | while len(self.crawling) > 0: 230 | crawling_user = self.crawling.popleft() 231 | adj_arr = self.crawl(crawling_user['uid']) 232 | if adj_arr == None: 233 | logging.error('不正常终止') 234 | exit(-1) 235 | self.del_from_table('Crawling', 'uid', crawling_user['uid']) 236 | logging.info(f"{crawling_user['uid']}-{crawling_user['uname']} 已从 Crawling 队列和数据库中移除") 237 | self.ins_to_table('Crawled', crawling_user) 238 | logging.info(f'{crawling_user["uid"]}-{crawling_user["uname"]} 已加入到 Crawled 队列和数据库中') 239 | # 是 Following，并且没有被抓取过 240 | for v in adj_arr: 241 | if len(self.sel_from_table('Crawled', 'uid', v['uid'])) == 0: 242 | crawling_user_new = {'uid': v['uid'], 'uname': v['uname']} 243 | self.ins_to_table('Crawling', crawling_user_new) 244 | self.crawling.append(crawling_user_new) 245 | logging.info(f"{v['uid']}-{v['uname']} 已加入到 Crawling 队列和数据库中") 246 | 247 | def signal_handler(sig, frame): 248 | print('You pressed Ctrl+C!') 249 | sys.exit(0) 250 | if __name__ == "__main__": 251 | signal.signal(signal.SIGINT, signal_handler) 252 | 253 | spider = WBSpider() 254 | spider.startBFS() 255 | -------------------------------------------------------------------------------- /src/my.py: -------------------------------------------------------------------------------- 1 | from util.mysql_helper import * 2 | 3 | create_db_if_not_exists() 4 | create_table_if_not_exists() 5 | -------------------------------------------------------------------------------- /src/util/Bmob.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -* 2 | ''' 3 | Created on 2015年7月2日 4 | 5 | @author: RobinTang 6 | 7 | https://github.com/sintrb/Bmob-Py 8 | 9 | ''' 10 | import json 11 | import copy 12 | import functools 13 | import requests 14 | from urllib import parse 15 | from .config import * 16 | 17 | def _urljoin(func): 18 | @functools.wraps(func) 19 | def _wrapper(self, resource_path, *args, **kwargs): 20 | url = self.apiurl + '/' + resource_path 21 | return func(self, url, *args, **kwargs) 22 | return _wrapper 23 | 24 | 25 | def urlencode(params): 26 | if isinstance(params, dict): 27 | return parse.urlencode(params) 28 | elif isinstance(params, list): 29 | return parse.quote(''.join(params)) 30 | else: 31 | return parse.quote(params) 32 | 33 | 34 | class BmobSDK(object): 35 | ''' 36 | BmobSDK, create with Application ID and REST API Key. You can use she same Application with BmobSDK.setup() method. 37 | ''' 38 | context = None 39 | 40 | def __init__(self, appid, restkey, apiurl='http://api.bmob.cn/1/classes'): 41 | super(BmobSDK, self).__init__() 42 | self.appid = appid 43 | self.restkey = restkey 44 | self.apiurl = apiurl 45 | self._http_headers = { 46 | "x-Bmob-Application-Id": self.appid, 47 | "X-Bmob-REST-API-Key": self.restkey, 48 | "Content-Type": "application/json"} 49 | # https://stackoverflow.com/questions/24873927/python-requests-module-and-connection-reuse 50 | # 连接重用，防止出现连接数过多抛出异常 51 | # https://2.python-requests.org//en/latest/user/advanced/#session-objects 52 | self.session = requests.Session() 53 | @_urljoin 54 | def get(self, url): 55 | return self.session.get(url, headers=self._http_headers) 56 | 57 | @_urljoin 58 | def post(self, url, **kwargs): 59 | return self.session.post(url, json=kwargs.get('data'), headers=self._http_headers) 60 | 61 | @_urljoin 62 | def put(self, url, **kwargs): 63 | return self.session.put(url, json=kwargs.get('data'), headers=self._http_headers) 64 | 65 | @_urljoin 66 | def delete(self, url, **kwargs): 67 | return self.session.delete(url, headers=self._http_headers) 68 | 69 | 70 | @staticmethod 71 | def setup(appid, restkey): 72 | BmobSDK.context = BmobSDK(appid, restkey) 73 | 74 | 75 | class Query(object): 76 | ''' 77 | Bmob Query 78 | ''' 79 | 80 | def __init__(self, clz, context=None): 81 | super(Query, self).__init__() 82 | if not context: 83 | context = BmobSDK.context 84 | if not context: 85 | raise BaseException("No BmobSDK context setuped!") 86 | self.context = context 87 | self.clz = clz 88 | self.q = {} 89 | self.w = {} # where 90 | self.items = None 91 | 92 | def copy(self): 93 | q = Query(self.clz, self.context) 94 | q.q = copy.deepcopy(self.q) 95 | q.w = copy.deepcopy(self.w) 96 | return q 97 | 98 | def get_urlencode(self): 99 | if self.w: 100 | self.q['where'] = json.dumps(self.w) 101 | elif 'where' in self.q: 102 | del self.q['where'] 103 | return urlencode(self.q) 104 | 105 | def order(self, o): 106 | self.q['order'] = o 107 | return self.copy() 108 | 109 | def limit(self, l): 110 | self.q['limit'] = l 111 | return self.copy() 112 | 113 | def skip(self, s): 114 | self.q['skip'] = s 115 | return self.copy() 116 | 117 | def count(self): 118 | if not self.items is None: 119 | return len(self.items) 120 | else: 121 | self.limit(0) 122 | self.q['count'] = 1 123 | path = '/'.join([self.clz.__name__, '?' + self.get_urlencode()]) 124 | return self.context.get(path).json()['count'] 125 | 126 | def get_kw(self, k): 127 | if k in self.w: 128 | return self.w[k] 129 | else: 130 | self.w[k] = {} 131 | return self.w[k] 132 | 133 | def w_eq(self, k, v): 134 | '''equal''' 135 | self.w[k] = v 136 | return self.copy() 137 | 138 | def w_lt(self, k, v): 139 | '''less than''' 140 | self.get_kw(k)['$lt'] = v 141 | return self.copy() 142 | 143 | def w_lte(self, k, v): 144 | '''less than or equal''' 145 | self.get_kw(k)['$lte'] = v 146 | return self.copy() 147 | 148 | def w_gt(self, k, v): 149 | '''greater than''' 150 | self.geet_kw(k)['$gt'] = v 151 | return self.copy() 152 | 153 | def w_gte(self, k, v): 154 | '''greater than or equal''' 155 | self.get_kw(k)['$gte'] = v 156 | return self.copy() 157 | 158 | def w_ne(self, k, v): 159 | '''not equal''' 160 | self.get_kw(k)['$ne'] = v 161 | return self.copy() 162 | 163 | def w_in(self, k, v): 164 | '''in''' 165 | self.get_kw(k)['$in'] = v 166 | return self.copy() 167 | 168 | def w_nin(self, k, v): 169 | '''not in''' 170 | self.get_kw(k)['$nin'] = v 171 | return self.copy() 172 | 173 | def w_exists(self, k, v): 174 | self.get_kw(k)['$exists'] = v 175 | return self.copy() 176 | 177 | def w_select(self, k, v): 178 | self.get_kw(k)['$select'] = v 179 | return self.copy() 180 | 181 | def w_dontSelect(self, k, v): 182 | self.get_kw(k)['$dontSelect'] = v 183 | return self.copy() 184 | 185 | def w_all(self, k, v): 186 | self.get_kw(k)['$all'] = v 187 | return self.copy() 188 | 189 | def w_regex(self, k, v): 190 | self.get_kw(k)['$regex'] = v 191 | return self.copy() 192 | 193 | def exec_query(self): 194 | rs = [] 195 | path = '/'.join([self.clz.__name__, '?' + self.get_urlencode()]) 196 | for r in self.context.get(path).json()['results']: 197 | rs.append(self.clz(**r)) 198 | self.items = rs 199 | return self.items 200 | 201 | def first(self): 202 | q = self.copy() 203 | q.limit(1) 204 | rs = q.exec_query() 205 | return len(rs) and rs[0] or None 206 | 207 | def __getslice__(self, s, e): 208 | if self.items is None: 209 | self.exec_query() 210 | return self.items.__getslice__(s, e) 211 | 212 | def __iter__(self): 213 | if self.items is None: 214 | self.exec_query() 215 | return iter(self.items) 216 | 217 | def __getitem__(self, k): 218 | if self.items is None: 219 | self.exec_query() 220 | return self.items.__getitem__(k) 221 | 222 | def __len__(self): 223 | return self.count() 224 | 225 | 226 | class BmobModel(object): 227 | ''' 228 | Basic Bmob model, all other Bmob model must inherit this class. 229 | ''' 230 | 231 | def __init__(self, context=None, objectId=None, **kwargs): 232 | super(BmobModel, self).__init__() 233 | # check objectId 234 | if isinstance(context, str): 235 | objectId = context 236 | context = None 237 | 238 | if not context: 239 | context = BmobSDK.context 240 | if not context: 241 | raise BaseException("No BmobSDK context setuped!") 242 | self.context = context 243 | self.objectId = objectId 244 | if self.objectId: 245 | # get object by id 246 | path = '/'.join([self.get_modelname(), self.objectId]) 247 | for k, v in self.context.get(path).json().items(): 248 | setattr(self, k, v) 249 | else: 250 | for k, v in kwargs.items(): 251 | setattr(self, k, v) 252 | 253 | def get_attrs(self): 254 | return [k for k in type(self).__dict__ if not k.startswith('__')] 255 | 256 | def get_dict(self): 257 | ks = self.get_attrs() 258 | clz = type(self) 259 | dic = {} 260 | tps = [type(v) for v in [1, 1, 1.0, '1', (1, 2), [1, 2], {'1': '1'}, {1, 2}]] 261 | return dict([(k, type(getattr(clz, k))(getattr(self, k))) 262 | for k in ks if type(getattr(clz, k)) in tps]) 263 | 264 | def get_modelname(self): 265 | return type(self).__name__ 266 | 267 | def save(self): 268 | data = self.get_dict() 269 | #jdata = json.dumps(data) 270 | if self.objectId: 271 | path = '/'.join([self.get_modelname(), self.objectId]) 272 | for k, v in self.context.put(path, data=data).json().items(): 273 | setattr(self, k, v) 274 | else: 275 | path = self.get_modelname() 276 | for k, v in self.context.post(path, data=data).json().items(): 277 | setattr(self, k, v) 278 | 279 | def delete(self): 280 | if self.objectId: 281 | path = '/'.join([self.get_modelname(), self.objectId]) 282 | res = self.context.delete(path).json()['msg'] == 'ok' 283 | if res: 284 | self.objectId = None 285 | return res 286 | else: 287 | return True 288 | 289 | def query(self): 290 | return Query(type(self)) 291 | 292 | 293 | -------------------------------------------------------------------------------- /src/util/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -* 2 | 3 | config = { 4 | # Bmob 配置 5 | 'bmob': { 6 | 'APP_ID': '366372322020724a39d8de5ccd61eeaa', 7 | 'REST_API_KEY': '40de9f3e91287703e695fe1f6b94393a', 8 | }, 9 | # 微博配置 10 | 'weibo': { 11 | # Cookie 获取方法：前往 m.weibo.cn，打开一条评论较多的微博全文，往下翻几页 12 | # 这时 Chrome 的 Network 界面的 request headers 就会有 Cookie 信息了 13 | # 注意: m.weibo.cn 比较特殊，查看微博并不需要登录，而看评论确实是需要的 14 | # 比如直接进这个网址 https://m.weibo.cn/detail/4389138709375153，往后多翻几条评论在 Network 的 XHR 里面可以看到 request headers 的 Cookie 15 | 'COOKIE': 'ALF=1564734503; SCF=AuUY2ywPv1KKDsxqBgngDXYn7XTsKn_5p4iBblRihSxO8mUlZ5DB13iaxpPOY50QQzi_qq8HXRkR0NEl6MjJ-Ts.; SUB=_2A25wGOOGDeRhGeFP4lcU9SfJzD-IHXVT4o3OrDV6PUJbktBeLW_RkW1NQO_UfEy6P_rwgaJHDE-0R3sOldFws7cD; SUBP=0033WrSXqPxfM725Ws9jqgMF55529P9D9W5aERPgdESA6l4AaFTr3jGy5JpX5K-hUgL.FoMp1K-fSK.fS0e2dJLoIp7LxKML1KBLBKnLxKqL1hnLBoM7SKnRe0eRe0z0; SUHB=0laVFNbqkGjoCM; _T_WM=68656738488; WEIBOCN_FROM=1110106030; MLOGIN=1; XSRF-TOKEN=aa01d1; M_WEIBOCN_PARAMS=luicode%3D10000011%26lfid%3D1076031669879400%26uicode%3D20000061%26fid%3D4389138709375153%26oid%3D4389138709375153' 16 | }, 17 | 'mysql': { 18 | 'CONNECTION': { 19 | 'host': "localhost", 20 | 'user': 'upupming', 21 | 'charset': 'utf8mb4' 22 | } 23 | }, 24 | 'crawl': { 25 | # 用来初始化爬取队列 26 | 'START_USER': '2803301701', 27 | # 每两次请求之间等待 PERIOD 秒 28 | 'PERIOD': 4, 29 | # 被封之后等待 5 分钟再次请求 30 | 'FORBID_PAUSE': 300 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/util/mysql_helper.py: -------------------------------------------------------------------------------- 1 | import mysql.connector 2 | from .config import * 3 | 4 | MYDB = mysql.connector.connect(**config['mysql']['CONNECTION']) 5 | MYCURSOR = MYDB.cursor() 6 | 7 | def create_db_if_not_exists(): 8 | MYCURSOR.execute('create database if not exists seq2emoji') 9 | def create_table_if_not_exists(): 10 | """ 11 | 返回指向表的 MYDB 和 MYCURSOR 12 | """ 13 | global MYDB, MYCURSOR 14 | MYDB = mysql.connector.connect(**config['mysql']['CONNECTION'], database='seq2emoji') 15 | MYCURSOR = MYDB.cursor() 16 | 17 | # Crawling 18 | MYCURSOR.execute(""" 19 | create table if not exists Crawling ( 20 | uid varchar(255) not null unique, 21 | uname varchar 22 | (255) CHARACTER SET utf8mb4 collate utf8mb4_unicode_520_ci, 23 | primary key (uid) 24 | ) 25 | """) 26 | MYCURSOR.execute("SELECT * FROM Crawling") 27 | myresult = MYCURSOR.fetchall() 28 | # 当前没有正在抓取的用户，就初始化一下 29 | if len(myresult) == 0: 30 | MYCURSOR.execute('insert into Crawling (uid, uname) values (%s, %s)', (config['crawl']['START_USER'], None)) 31 | MYDB.commit() 32 | # Crawled 33 | MYCURSOR.execute(""" 34 | create table if not exists Crawled ( 35 | uid varchar(255) not null unique, 36 | uname varchar 37 | (255) CHARACTER SET utf8mb4 collate utf8mb4_unicode_520_ci, 38 | primary key (uid) 39 | ) 40 | """) 41 | # Weibo 42 | MYCURSOR.execute(""" 43 | create table if not exists Weibo ( 44 | uid varchar(255), 45 | mid varchar(255) not null unique, 46 | text text CHARACTER SET utf8mb4 collate utf8mb4_unicode_520_ci, 47 | img_emoji json, 48 | primary key (mid) 49 | ) 50 | """) 51 | # Comment 52 | MYCURSOR.execute(""" 53 | create table if not exists Comment ( 54 | mid varchar(255) not null, 55 | cid varchar(255) not null unique, 56 | text text CHARACTER SET utf8mb4 collate utf8mb4_unicode_520_ci, 57 | img_emoji json, 58 | primary key (cid) 59 | ) 60 | """) 61 | 62 | return (MYDB, MYCURSOR) 63 | -------------------------------------------------------------------------------- /src/util/table.py: -------------------------------------------------------------------------------- 1 | from .Bmob import BmobSDK, BmobModel 2 | 3 | class Emoji(BmobModel): 4 | desc = '' 5 | content = '' 6 | 7 | class Crawling(BmobModel): 8 | uid = 0 9 | uname = '' 10 | 11 | class Crawled(BmobModel): 12 | uid = 0 13 | uname = '' 14 | 15 | class Weibo(BmobModel): 16 | mid = '' 17 | text = '' 18 | img_emoji = [] 19 | 20 | class Comment(BmobModel): 21 | cid = '' 22 | mid = '' 23 | text = '' 24 | img_emoji = [] 25 | --------------------------------------------------------------------------------

weiboAPI

userInfo

cards

statuses

retweet