├── Dockerfile
├── README.md
├── conf.py
├── requirements.txt
├── 工商信息查询.py
└── 看准-企业工商信息查询.py


/Dockerfile:
--------------------------------------------------------------------------------
 1 | # 基于镜像基础
 2 | FROM python:3.10.1
 3 | 
 4 | # 设置时区
 5 | ENV TZ Asia/Shanghai
 6 | 
 7 | # 设置代码文件夹工作目录 /app
 8 | WORKDIR /app
 9 | 
10 | # 复制当前代码文件到容器中 /app
11 | ADD . /app
12 | 
13 | # 安装所需的包
14 | RUN pip install -r requirements.txt -i https://pypi.doubanio.com/simple/
15 | 
16 | CMD ["gunicorn", "-c", "conf.py", "看准-企业工商信息查询:app"]
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 企业工商信息查询接口
 2 | 
 3 | 企业工商信息接口(包含天眼查、企查查、爱企查、国家企业公示系统平台、快准)
 4 | 
 5 | 接口文档(http://127.0.0.1:8081/docs)
 6 | 
 7 | tip:代理设置(158行更换)
 8 | 
 9 | 项目运行  
10 |   
11 | `pip install requirements.txt -i https://pypi.doubanio.com/simple/`
12 |   
13 | ` uvicorn 工商信息查询:app --host 0.0.0.0 --port 8081 --reload` 
14 | 
15 | docker 运行  
16 |   
17 |  `docker build -t businessinfo https://github.com/Litre-WU/businessInfo-api.git `  
18 |    
19 |  `docker run --name businessInfo -d -p 8081:8081 businessinfo`
20 | 


--------------------------------------------------------------------------------
/conf.py:
--------------------------------------------------------------------------------
 1 | # vim: set fileencoding:utf-8
 2 | # -*- coding: utf-8 -*-
 3 | # Author: Litre WU
 4 | # E-mail: litre-wu@tutanota.com
 5 | # Software: PyCharm
 6 | # File: gunicorn.py
 7 | # Time: 4月 23, 2021
 8 | import logging
 9 | import logging.handlers
10 | from logging.handlers import WatchedFileHandler
11 | import os
12 | import multiprocessing
13 | 
14 | # chdir = '/app'  # 加载应用程序之前将chdir目录指定到指定目录
15 | 
16 | proc_name = 'businessInfo'  # 进程名
17 | 
18 | bind = '0.0.0.0:8081'  # 绑定ip和端口号
19 | 
20 | backlog = 512  # 监听队列
21 | 
22 | timeout = 10  # 超时
23 | 
24 | # worker_class = 'gevent' # 默认的是sync模式
25 | # worker_class = 'uvicorn.workers.UvicornWorker'  # 使用uvicorn模式
26 | worker_class = 'uvicorn.workers.UvicornH11Worker'  # 使用纯python模式
27 | 
28 | # workers = multiprocessing.cpu_count() * 2 + 1  # 进程数
29 | # workers = multiprocessing.cpu_count() * 2 + 1  # 进程数
30 | workers = 4  # 进程数
31 | 
32 | threads = 4  # 指定每个进程开启的线程数
33 | 
34 | # deamon = True  # 守护进程
35 | 
36 | reload = True  # 自动加载
37 | 
38 | worker_connections = 2000  # 设置最大并发量
39 | 
40 | loglevel = 'info'  # 日志级别，这个日志级别指的是错误日志的级别，而访问日志的级别无法设置
41 | 
42 | # accesslog = "/businessInfo/logs/demo_access.log"  # 访问日志文件, "-" 表示标准输出
43 | 
44 | # errorlog = "/businessInfo/logs/demo_err.log"  # 错误日志文件, "-" 表示标准输出
45 | 
46 | # access_log_format = '%(h)s %(l)s %(u)s %(t)s'
47 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | ﻿aiohttp
 2 | fastapi
 3 | user-agent
 4 | lxml
 5 | uvicorn
 6 | gunicorn
 7 | pandas
 8 | python-multipart
 9 | brotlipy
10 | loguru
11 | boltons
12 | beautifulsoup4
13 | 


--------------------------------------------------------------------------------
/工商信息查询.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Author: Litre WU
  3 | # E-mail: litre-wu@tutanota.com
  4 | # Software: PyCharm
  5 | # File: 工商信息查询.py
  6 | # Time: 4月 21, 2021
  7 | import asyncio
  8 | from typing import Optional, List
  9 | from fastapi import FastAPI, Header, Cookie, Depends, BackgroundTasks
 10 | from starlette.requests import Request
 11 | from pydantic import BaseModel, Field
 12 | from fastapi.responses import JSONResponse
 13 | import aiohttp
 14 | from user_agent import generate_user_agent
 15 | from lxml import etree
 16 | import pandas as pd
 17 | import json
 18 | import time
 19 | from random import randint, sample
 20 | import os
 21 | from json import load, dump
 22 | import socket
 23 | from sys import platform
 24 | from functools import lru_cache
 25 | from loguru import logger
 26 | from boltons.cacheutils import LRI, LRU
 27 | from hashlib import md5
 28 | 
 29 | lri_cache = LRI(max_size=100)
 30 | lru_cache = LRU(max_size=100)
 31 | 
 32 | logger.add(f'{os.path.basename(__file__)[:-3]}.log', rotation='200 MB', compression='zip', enqueue=True, serialize=False, encoding='utf-8', retention='7 days')
 33 | 
 34 | 
 35 | host = socket.gethostbyname(socket.gethostname())
 36 | 
 37 | if platform == "win32":
 38 |     asyncio.set_event_loop(asyncio.ProactorEventLoop())
 39 |     asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
 40 | 
 41 | tags_metadata = [
 42 |     {
 43 |         "name": "企业工商信息查询接口",
 44 |         "description": "企业工商信息查询(天眼查、企查查、爱企查、国家企业公示系统)",
 45 |         "externalDocs": {
 46 |             "description": "More",
 47 |             "url": f"http://{host}/docs",
 48 |         },
 49 |     },
 50 | ]
 51 | 
 52 | contact = {
 53 |     "name": "Litre",
 54 |     "url": "http://121.37.209.113",
 55 |     "email": "litre-wu@tutanota.com",
 56 | }
 57 | 
 58 | app = FastAPI(openapi_url="/api/v1/api.json", title="企业工商信息查询接口", contact=contact, openapi_tags=tags_metadata)
 59 | 
 60 | 
 61 | # 日志
 62 | async def log(request, **kwargs):
 63 |     ritems = dict(request.items())
 64 |     if not kwargs: kwargs = ""
 65 |     log_info = f'{ritems["client"][0]} {ritems["method"]} {ritems["path"]} {ritems["type"]}/{ritems["http_version"]} {kwargs}'
 66 |     logger.info(log_info)
 67 |     
 68 |     
 69 | # 首页
 70 | @app.get("/", tags=["首页"])
 71 | async def index(request: Request, user_agent: Optional[str] = Header(None), x_token: List[str] = Header(None), ):
 72 |     result = {
 73 |         "code": 200,
 74 |         "msg": "来了！老弟",
 75 |         "result": "你看这个面它又长又宽，就像这个碗它又大又圆",
 76 |         "info": {
 77 |             "openapi_url": "/api/v1/openapi.json",
 78 |             "ip": request.client.host,
 79 |             "x-token": x_token,
 80 |             "user-agent": user_agent,
 81 |             "headers": dict(request.headers)
 82 |         }
 83 |     }
 84 |     return JSONResponse(result)
 85 | 
 86 | 
 87 | class Qcc(BaseModel):
 88 |     key: str = Field(..., example='哔哩哔哩')
 89 |     creditCode: str = Field(..., example='统一社会信用代码(暂不使用)')
 90 | 
 91 | 
 92 | @app.post("/", tags=["企业工商信息查询接口"])
 93 | async def api(data: Qcc, request: Request, background_tasks: BackgroundTasks, x_token: List[str] = Header(None),
 94 |               user_agent: Optional[str] = Header(None)):
 95 |     kwargs = data.dict()
 96 |     await log(request, **kwargs)
 97 |     key = md5(str(kwargs).encode()).hexdigest()
 98 |     if lru_cache.get(key): return lru_cache[key]
 99 |     result = await query(**kwargs)
100 |     if result: lru_cache[key] = result
101 |     return JSONResponse(result)
102 | 
103 | 
104 | # 公共请求函数
105 | async def pub_req(**kwargs):
106 |     if not kwargs.get("url", ""): return None
107 |     headers ={**{
108 |         "X-Forwarded-For": ip,
109 |         "X-Forwarded": ip,
110 |         "Forwarded-For": ip,
111 |         "Forwarded": ip,
112 |         "X-Forwarded-Proto": ip,
113 |         "X-Forwarded-Host": ip,
114 |         "X-Requested-With": ip,
115 |         "X-Client-IP": ip,
116 |         "X-remote-IP": ip,
117 |         "X-remote-addr": ip,
118 |         "X-Real-IP": ip,
119 |         "True-Client-IP": ip,
120 |         "Client-IP": ip,
121 |         "X_FORWARDED_FOR": ip,
122 |         "X_REAL_IP": ip,
123 |         "User-Agent": generate_user_agent()
124 |     }, **kwargs.get("headers", {})}
125 |     try:
126 |         # aiohttp
127 |         async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=10),
128 |                                          connector=aiohttp.TCPConnector(ssl=False), trust_env=True) as client:
129 |             proxy_auth = aiohttp.BasicAuth(kwargs.get("proxy_user", ""), kwargs.get("proxy_pass", ""))
130 |             async with client.request(method=kwargs.get("method", "GET"), url=kwargs["url"],
131 |                                       params=kwargs.get("params", {}),
132 |                                       data=kwargs.get("data", {}), headers=headers, proxy=kwargs.get("proxy", ""),
133 |                                       proxy_auth=proxy_auth,
134 |                                       timeout=kwargs.get("timeout", 5)) as rs:
135 |                 if rs.status == 200:
136 |                     result = await rs.read()
137 |                     return result
138 |                 else:
139 |                     logger.info(f"pub_req {kwargs} {rs.status} {rs.text}")
140 |                     time.sleep(randint(1, 2))
141 |                     retry = kwargs.get("retry", 0)
142 |                     retry += 1
143 |                     if retry >= 2:
144 |                         return None
145 |                     kwargs["retry"] = retry
146 |                     return await pub_req(**kwargs)
147 |     except Exception as e:
148 |         logger.info(f"pub_req {kwargs} {e}")
149 |         time.sleep(randint(1, 2))
150 |         retry = kwargs.get("retry", 0)
151 |         retry += 1
152 |         if retry >= 2:
153 |             return None
154 |         kwargs["retry"] = retry
155 |         return await pub_req(**kwargs)
156 | 
157 | 
158 | # 代理
159 | async def get_proxy(**kwargs):
160 |     if not kwargs.get("turn", 0):
161 |         time_now = int(time.time())
162 |         if not os.path.exists('proxy.json'):
163 |             with open('proxy.json', 'w') as f:
164 |                 dump([], f)
165 |         with open('proxy.json', 'r') as f:
166 |             data = json.load(f)
167 |             if data:
168 |                 expire_time = int(time.mktime(time.strptime(data[0]["expire_time"], "%Y-%m-%d %H:%M:%S")))
169 |                 if time_now < expire_time:
170 |                     return data
171 |     # # 番茄代理
172 |     # url = 'http://x.fanqieip.com/gip'
173 |     # params = {"getType": "3","qty": "1","port": "1","time": "1","city": "0","format": "2","ss": "1","dt": "1","css": ""}
174 |     # 芝麻代理
175 |     url = 'http://webapi.http.zhimacangku.com/getip'
176 |     params = {"num": "1", "type": "2", "pro": "0", "city": "0", "yys": "0", "port": "1", "time": "1", "ts": "1",
177 |               "ys": "0", "cs": "0", "lb": "1", "sb": "0", "pb": "4", "mr": "1", "regions": ""}
178 |     try:
179 |         meta = {
180 |             "url": url,
181 |             "params": params,
182 |         }
183 |         result = await pub_req(**meta)
184 |         logger.info(result.decode())
185 |         if not result: return None
186 |         result = json.loads(result)
187 |         if result.get("data", ""):
188 |             with open('proxy.json', 'w') as f:
189 |                 json.dump(result["data"], f)
190 |             return result["data"]
191 |         else:
192 |             time.sleep(randint(0, 1))
193 |             retry = kwargs.get("retry", 0)
194 |             retry += 1
195 |             if retry >= 2:
196 |                 return None
197 |             kwargs["retry"] = retry
198 |             return await get_proxy(**kwargs)
199 |     except Exception as e:
200 |         logger.info(e)
201 |         retry = kwargs.get("retry", 0)
202 |         retry += 1
203 |         if retry >= 2:
204 |             return None
205 |         kwargs["retry"] = retry
206 |         return await get_proxy(**kwargs)
207 | 
208 | 
209 | # IP查询
210 | async def query_ip(**kwargs):
211 |     url = 'http://httpbin.org/get?show_env=1'
212 |     try:
213 |         meta = {
214 |             "url": url,
215 |             "proxy": kwargs.get("proxy", ""),
216 |             "proxy_user": kwargs.get("proxy_user", ""),
217 |             "proxy_pass": kwargs.get("proxy_pass", ""),
218 |         }
219 |         result = await pub_req(**meta)
220 |         if not result: return None
221 |         result = json.loads(result)
222 |         # logger.info(result)
223 |         ip = result["origin"].split()[0]
224 |         return ip
225 |     except Exception as e:
226 |         logger.info(f'query_ip {e}')
227 |         time.sleep(randint(1, 2))
228 |         retry = kwargs.get("retry", 0)
229 |         retry += 1
230 |         if retry >= 2:
231 |             return None
232 |         kwargs["retry"] = retry
233 |         return await query_ip(**kwargs)
234 | 
235 | 
236 | # 查询
237 | async def query(**kwargs):
238 |     result = await qcc(**kwargs)
239 |     result = await tyc(**kwargs) if not result else result
240 |     result = await aqc(**kwargs) if not result else result
241 |     result = await gsxt(**kwargs) if not result else result
242 |     if result:
243 |         result = {"code": 200, "msg": "OK", "result": result}
244 |     else:
245 |         retry = kwargs.get("retry", 0)
246 |         retry += 1
247 |         kwargs["retry"] = retry
248 |         if retry == 1:
249 |             # 第一次代理
250 |             proxy = await get_proxy()
251 |             if proxy:
252 |                 kwargs = kwargs | {"proxy": f'http://{proxy[0]["ip"]}:{proxy[0]["port"]}'}
253 |                 return await query(**kwargs)
254 |             else:
255 |                 kwargs = kwargs | {"proxy": ""}
256 |                 return await query(**kwargs)
257 |         if retry > 2:
258 |             return {"code": 200, "msg": "Fail", "result": None}
259 |         # 第二次更换代理
260 |         proxy = await get_proxy(**{"turn": 1})
261 |         if proxy:
262 |             kwargs = kwargs | {"proxy": f'http://{proxy[0]["ip"]}:{proxy[0]["port"]}'}
263 |         else:
264 |             kwargs = kwargs | {"proxy": ""}
265 |         return await query(**kwargs)
266 |     return result
267 | 
268 | 
269 | # 天眼查
270 | async def tyc(**kwargs):
271 |     try:
272 |         meta = {
273 |             "url": "https://m.tianyancha.com/search",
274 |             "params": {"key": kwargs.get("key", "")},
275 |             "headers": {"Referer": "https://m.tianyancha.com"},
276 |             "proxy": kwargs.get("proxy", ""),
277 |             "proxy_user": kwargs.get("proxy_user", ""),
278 |             "proxy_pass": kwargs.get("proxy_user", ""),
279 |         }
280 |         result = await pub_req(**meta)
281 |         if not result: return None
282 |         html = result.decode()
283 |         ids = etree.HTML(html).xpath('//div[@class="search-company-item"]/@onclick')
284 |         if not ids: return None
285 |         ids = [x.strip("jumpToCompany('").strip("');") for x in ids]
286 |         tasks = [asyncio.create_task(tyc_detail(**{"id": ids[i], "proxy": kwargs.get("proxy", "")})) for i in
287 |                  range(len(ids))]
288 |         result = await asyncio.gather(*tasks)
289 |         return [x for x in result if x]
290 |     except Exception as e:
291 |         logger.info(f'tyc {e}')
292 |         retry = kwargs.get("retry", 0)
293 |         retry += 1
294 |         if retry >= 2:
295 |             return None
296 |         kwargs["retry"] = retry
297 |         return await tyc(**kwargs)
298 | 
299 | 
300 | # 天眼查详情
301 | async def tyc_detail(**kwargs):
302 |     _id = kwargs.get("id", "")
303 |     if not _id: return None
304 |     try:
305 |         meta = {
306 |             "url": f'https://m.tianyancha.com/company/{_id}',
307 |             "headers": {
308 |                 "Referer": "https://m.tianyancha.com/search",
309 |             },
310 |             "proxy": kwargs.get("proxy", ""),
311 |             "proxy_user": kwargs.get("proxy_user", ""),
312 |             "proxy_pass": kwargs.get("proxy_pass", ""),
313 |         }
314 |         result = await pub_req(**meta)
315 |         if not result: return None
316 |         html = result.decode()
317 |         divs = etree.HTML(html).xpath('//div[@class="content"]/div[@class="divide-content"]/div')
318 |         info = [x.xpath('div//text()') for x in divs] if divs else ""
319 |         data = {}
320 |         if not info:
321 |             retry = kwargs.get("retry", 0)
322 |             retry += 1
323 |             if retry >= 2:
324 |                 return None
325 |             kwargs["retry"] = retry
326 |             return await tyc_detail(**kwargs)
327 |         for x in info:
328 |             if "法定代表人" in x:
329 |                 if len(x) == 2:
330 |                     data[x[0]] = x[1]
331 |                 else:
332 |                     data[x[0]] = x[2]
333 |             elif "经营范围" in x:
334 |                 data[x[0]] = x[1]
335 |             else:
336 |                 if len(x) > 3:
337 |                     for i in range(0, len(x), 2):
338 |                         data[x[i]] = x[i + 1]
339 |                 else:
340 |                     data[x[0]] = x[1]
341 |         result = {
342 |             "social_credit_code": data.get("统一社会信用代码", ""),
343 |             "name_cn": etree.HTML(html).xpath('//meta[@name="tyc-wx-title"]/@content')[0],
344 |             "legal_person": data.get("法定代表人", ""),
345 |             "status": data.get("经营状态", ""),
346 |             "found_date": data.get("成立日期", ""),
347 |             "registered_capital": data.get("注册资本", ""),
348 |             "really_capital": data.get("实缴资本", ""),
349 |             "issue_date": data.get("核准日期", ""),
350 |             "organization_code": data.get("组织机构代码", ""),
351 |             "regist_code": data.get("工商注册号", ""),
352 |             "taxpayer_code": data.get("纳税人识别号", ""),
353 |             "type": data.get("企业类型", ""),
354 |             "license_start_date": data.get("营业期限", ""),
355 |             "taxpayer_crop": data.get("纳税人资质", ""),
356 |             "industry_involved": data.get("行业", ""),
357 |             "province": data.get("所属地区", ""),
358 |             "regist_office": data.get("登记机关", ""),
359 |             "staff_size": data.get("人员规模", ""),
360 |             "insured_size": data.get("参保人数", ""),
361 |             "transformer_name": data.get("曾用名", ""),
362 |             "name_en": data.get("英文名称", ""),
363 |             "imp_exp_enterprise_code": data.get("进出口企业代码", ""),
364 |             "address": data.get("注册地址", ""),
365 |             "regist_address": data.get("注册地址", ""),
366 |             "business_scope": data.get("经营范围", ""),
367 |             "email": "",
368 |             "unit_phone": "",
369 |             "fax": "",
370 |             "website": ""
371 |         }
372 |         if result.get("license_start_date", ""):
373 |             result["license_start_date"], result["license_end_date"] = result["license_start_date"].split(
374 |                 "至")
375 |         else:
376 |             result["license_start_date"], result["license_end_date"] = "", ""
377 |         return result
378 |     except Exception as e:
379 |         logger.info(f'tyc_detail {e}')
380 |         retry = kwargs.get("retry", 0)
381 |         retry += 1
382 |         if retry >= 2:
383 |             return None
384 |         kwargs["retry"] = retry
385 |         return await tyc_detail(**kwargs)
386 | 
387 | 
388 | # 企查查
389 | async def qcc(**kwargs):
390 |     try:
391 |         meta = {
392 |             "url": "https://www.qcc.com/web/search",
393 |             "params": {"key": kwargs.get("key", "")},
394 |             "headers": {
395 |                 "Cookie": "",
396 |                 "Referer": f'https://www.qcc.com/web/search?key={kwargs.get("key", "")}'
397 |             },
398 |             "proxy": kwargs.get("proxy", ""),
399 |             "proxy_user": kwargs.get("proxy_user", ""),
400 |             "proxy_pass": kwargs.get("proxy_pass", ""),
401 |         }
402 |         result = await pub_req(**meta)
403 |         if not result: return None
404 |         html = result.decode()
405 |         content = etree.HTML(html).xpath('//script[1]/text()')
406 |         content = '{"appState' + content[0].split("appState")[1].split(";(function")[
407 |             0] if content else ""
408 |         if not content: return None
409 |         result = json.loads(content)
410 |         result = result["search"]["searchRes"].get("Result", "") if result else ""
411 |         if not result:
412 |             return None
413 |         data_list = []
414 |         for r in result:
415 |             data = {
416 |                 "keyNo": r.get("KeyNo", ""),
417 |                 "legal_person": r.get("OperName", "").replace("<em>", "").replace("</em>", ""),
418 |                 "email": r.get("Email", ""),
419 |                 "unit_phone": r.get("ContactNumber", ""), "fax": "",
420 |                 "address": r.get("Address", "").replace("<em>", "").replace("</em>", ""),
421 |                 "website": r.get("GW", "")
422 |             }
423 |             data_list.append(data)
424 |         tasks = [asyncio.create_task(qcc_detail(**{"data": data_list[i], "proxy": kwargs.get("proxy", "")})) for i in
425 |                  range(len(data_list))]
426 |         result = await asyncio.gather(*tasks)
427 |         return [x for x in result if x]
428 |     except Exception as e:
429 |         logger.info(f'qcc {e}')
430 |         retry = kwargs.get("retry", 0)
431 |         retry += 1
432 |         if retry >= 2:
433 |             return None
434 |         kwargs["retry"] = retry
435 |         return await qcc(**kwargs)
436 | 
437 | 
438 | # 企查查企业详情
439 | async def qcc_detail(**kwargs):
440 |     data = kwargs.get("data", "")
441 |     if not data: return None
442 |     try:
443 |         meta = {
444 |             # "url": f'https://www.qcc.com/firm/{data["keyNo"]}.html',
445 |             "url": f'https://www.qcc.com/cbase/{data["keyNo"]}.html',
446 |             # "url": f'https://m.qcc.com/firm/{data["keyNo"]}.html',
447 |             "headers": {
448 |                 "Connection": "close",
449 |                 "Cookie": "",
450 |                 # "cookie": "acw_sc__v2=6062bdefc57536ceeeb840ffcf85497a600eef9f",
451 |                 "Referer": f'https://www.qcc.com/firm/{data["keyNo"]}.html'
452 |                 # "Referer": f'https://m.qcc.com/firm/{data["keyNo"]}.html',
453 |             },
454 |             "proxy": kwargs.get("proxy", ""),
455 |             "proxy_user": kwargs.get("proxy_user", ""),
456 |             "proxy_pass": kwargs.get("proxy_pass", ""),
457 |         }
458 |         result = await pub_req(**meta)
459 |         if not result: return data
460 |         tables = pd.read_html(result.decode())
461 |         # logger.info(tables)
462 |         info_list = []
463 |         for t in tables[0].values.tolist():
464 |             info_list += t
465 |         info = {}
466 |         for i, x in enumerate(info_list):
467 |             if i % 2 == 0:
468 |                 if "复制" in x:
469 |                     continue
470 |                 info[x] = info_list[i + 1].replace("复制", "").strip()
471 |         result = {
472 |             "social_credit_code": info.get("统一社会信用代码", ""),
473 |             "name_cn": info.get("企业名称", ""),
474 |             "legal_person": info.get("法定代表人", ""),
475 |             "status": info.get("登记状态", ""),
476 |             "found_date": info.get("成立日期", ""),
477 |             "registered_capital": info.get("注册资本", ""),
478 |             "really_capital": info.get("实缴资本", ""),
479 |             "issue_date": info.get("核准日期", ""),
480 |             "organization_code": info.get("组织机构代码", ""),
481 |             "regist_code": info.get("工商注册号", ""),
482 |             "taxpayer_code": info.get("纳税人识别号", ""),
483 |             "type": info.get("企业类型", ""),
484 |             "license_start_date": info.get("营业期限", ""),
485 |             "taxpayer_crop": info.get("纳税人资质", ""),
486 |             "industry_involved": info.get("所属行业", ""),
487 |             "province": info.get("所属地区", ""),
488 |             "regist_office": info.get("登记机关", ""),
489 |             "staff_size": info.get("人员规模", ""),
490 |             "insured_size": info.get("参保人数", ""),
491 |             "transformer_name": info.get("曾用名", ""),
492 |             "name_en": info.get("英文名", "").split("（")[0],
493 |             "imp_exp_enterprise_code": info.get("进出口企业代码", ""),
494 |             "regist_address": info.get("注册地址", "").split()[0],
495 |             "business_scope": info.get("经营范围", ""),
496 |         }
497 |         if result.get("license_start_date", ""):
498 |             result["license_start_date"], result["license_end_date"] = (x.strip() for x in
499 |                                                                         result["license_start_date"].split(
500 |                                                                             "至"))
501 |         else:
502 |             result["license_start_date"], result["license_end_date"] = "", ""
503 |         data.pop("keyNo")
504 |         result = result | data
505 |         # # web
506 |         # table = etree.HTML(html).xpath('//table[@class="ntable"]')[0] if etree.HTML(html).xpath(
507 |         #     '//table[@class="ntable"]') else ""
508 |         # if type(table) == str:
509 |         #     retry = kwargs.get("retry", 0)
510 |         #     retry += 1
511 |         #     if retry >= 2:
512 |         #         return False
513 |         #     kwargs["retry"] = retry
514 |         #     return await qcc_detail(**kwargs)
515 |         # trs = table.xpath('tr')
516 |         # if not trs: return None
517 |         # tds = []
518 |         # for x in trs:
519 |         #     tds += x.xpath('td[@class="tb"]')
520 |         # info = {x.xpath('text()')[0].strip(): x.xpath('following-sibling::node()/text()')[0].strip() for x
521 |         #         in tds if x.xpath('following-sibling::node()/text()')}
522 |         # result = {
523 |         #     "social_credit_code": info.get("统一社会信用代码", ""),
524 |         #     "name_cn": info.get("企业名称", ""),
525 |         #     "legal_person": info.get("法定代表人", ""),
526 |         #     "status": info.get("登记状态", ""),
527 |         #     "found_date": info.get("成立日期", ""),
528 |         #     "registered_capital": info.get("注册资本", ""),
529 |         #     "really_capital": info.get("实缴资本", ""),
530 |         #     "issue_date": info.get("核准日期", ""),
531 |         #     "organization_code": info.get("组织机构代码", ""),
532 |         #     "regist_code": info.get("工商注册号", ""),
533 |         #     "taxpayer_code": info.get("纳税人识别号", ""),
534 |         #     "type": info.get("企业类型", ""),
535 |         #     "license_start_date": info.get("营业期限", "").strip(),
536 |         #     "taxpayer_crop": info.get("纳税人资质", ""),
537 |         #     "industry_involved": info.get("所属行业", ""),
538 |         #     "province": info.get("所属地区", ""),
539 |         #     "regist_office": info.get("登记机关", ""),
540 |         #     "staff_size": info.get("人员规模", ""),
541 |         #     "insured_size": info.get("参保人数", "") if info.get("参保人数", "") else
542 |         #     [span.strip() for span in table.xpath('tr/td/span/text()') if span.strip()][0],
543 |         #     "transformer_name": table.xpath('tr/td/div/text()')[-1].strip() if table.xpath('tr/td/div/text()') else "",
544 |         #     "name_en": info.get("英文名", ""),
545 |         #     "imp_exp_enterprise_code": info.get("进出口企业代码", ""),
546 |         #     "regist_address": info.get("注册地址", "") if info.get("注册地址", "") else
547 |         #     table.xpath('tr/td/a[@class="text-dk"]/text()')[0],
548 |         #     "business_scope": info.get("经营范围", ""),
549 |         # }
550 |         # if result.get("license_start_date", ""):
551 |         #     result["license_start_date"], result["license_end_date"] = (x.strip() for x in
552 |         #                                                                 result["license_start_date"].split(
553 |         #                                                                     "至"))
554 |         # else:
555 |         #     result["license_start_date"], result["license_end_date"] = "", ""
556 |         # result["legal_person"] = data.get("legal_person", "")
557 |         # data.pop("keyNo")
558 |         # logger.info({**data, **result})
559 |         return result
560 |     except Exception as e:
561 |         logger.info(f'qcc_detail {e} {data["keyNo"]}')
562 |         retry = kwargs.get("retry", 0)
563 |         retry += 1
564 |         if retry >= 2:
565 |             return None
566 |         kwargs["retry"] = retry
567 |         return await qcc_detail(**kwargs)
568 | 
569 | 
570 | # 爱企查
571 | async def aqc(**kwargs):
572 |     try:
573 |         meta = {
574 |             "url": "https://aiqicha.baidu.com/s",
575 |             "params": {"q": kwargs.get("key", ""), "t": "0"},
576 |             "headers": {"Cookie": "", "Referer": 'https://aiqicha.baidu.com/'},
577 |             "proxy": kwargs.get("proxy", ""),
578 |             "proxy_user": kwargs.get("proxy_user", ""),
579 |             "proxy_pass": kwargs.get("proxy_pass", ""),
580 |         }
581 |         result = await pub_req(**meta)
582 |         if not result: return None
583 |         html = result.decode()
584 |         content = etree.HTML(html).xpath('//script[1]/text()')
585 |         if content:
586 |             result = '{"sid"' + content[0].split('{"sid"')[1].split(";\n")[0]
587 |             # logger.info(result)
588 |             result = json.loads(result)
589 |             data_list = []
590 |             for r in result["result"]["resultList"]:
591 |                 # if not creditCode or r["regNo"] == creditCode:
592 |                 #     return await aqc_detail(**{"data": {"pid": r["pid"]}})
593 |                 data_list.append({"pid": r["pid"]})
594 |             tasks = [asyncio.create_task(aqc_detail(**{"data": data_list[i], "proxy": kwargs.get("proxy", "")})) for i
595 |                      in
596 |                      range(len(data_list))]
597 |             result = await asyncio.gather(*tasks)
598 |             return [x for x in result if x]
599 |     except Exception as e:
600 |         logger.info(f'aqc {e}')
601 |         retry = kwargs.get("retry", 0)
602 |         retry += 1
603 |         if retry >= 2:
604 |             return None
605 |         kwargs["retry"] = retry
606 |         return await aqc(**kwargs)
607 | 
608 | 
609 | # 爱企查企业详情
610 | async def aqc_detail(**kwargs):
611 |     data = kwargs.get("data", "")
612 |     if not data: return None
613 |     try:
614 |         meta = {
615 |             "url": "https://aiqicha.baidu.com/detail/basicAllDataAjax",
616 |             "params": {"pid": data["pid"]},
617 |             "headers": {
618 |                 "Cookie": "",
619 |                 "Referer": f'https://aiqicha.baidu.com/company_detail_{data["pid"]}',
620 |                 "X-Requested-With": "XMLHttpRequest",
621 |                 "Zx-Open-Url": f'https://aiqicha.baidu.com/company_detail_{data["pid"]}'
622 |             },
623 |             "proxy": kwargs.get("proxy", ""),
624 |             "proxy_user": kwargs.get("proxy_user", ""),
625 |             "proxy_pass": kwargs.get("proxy_pass", ""),
626 |         }
627 |         result = await pub_req(**meta)
628 |         if not result: return None
629 |         result = json.loads(result.decode())
630 |         result = result["data"]["basicData"] if result.get("data", "") else ""
631 |         if not result:
632 |             retry = kwargs.get("retry", 0)
633 |             retry += 1
634 |             if retry >= 2:
635 |                 return None
636 |             kwargs["retry"] = retry
637 |             return await aqc_detail(**kwargs)
638 |         province = f'{result["district"].split("省")[0]}省' if "省" in result.get(
639 |             "district", "") else f'{result.get("district", "").split("市")[0]}市'
640 |         result = {
641 |             "name_cn": result.get("entName", ""),
642 |             "name_en": "",
643 |             "legal_person": result.get("legalPerson", ""),
644 |             "registered_capital": result.get("regCapital", ""),
645 |             "really_capital": result.get("realCapital", ""),
646 |             "found_date": result.get("startDate", ""),
647 |             "issue_date": result.get("annualDate", ""),
648 |             "social_credit_code": result.get("unifiedCode", ""),
649 |             "organization_code": result.get("orgNo", ""),
650 |             "regist_code": result.get("licenseNumber", ""),
651 |             "taxpayer_code": result.get("regNo", ""),
652 |             "imp_exp_enterprise_code": "",
653 |             "industry_involved": result.get("industry", ""),
654 |             "type": result.get("entType", ""),
655 |             "license_start_date": result.get("startDate", ""),
656 |             "license_end_date": result.get("openTime", "").split("至")[-1].strip(),
657 |             "regist_office": result.get("authority", ""),
658 |             "staff_size": "",
659 |             "insured_size": result["insuranceInfo"]["insuranceNum"],
660 |             "province": province,
661 |             "address": result.get("addr", ""),
662 |             "business_scope": result.get("scope", ""),
663 |             "email": result.get("email", ""),
664 |             "unit_phone": result.get("telephone", ""),
665 |             "fax": "",
666 |             "website": result.get("website", ""),
667 |             "regist_address": result.get("regAddr", ""),
668 |             "transformer_name": result["prevEntName"][0] if type(result.get("prevEntName", "")) == list else
669 |             result.get("prevEntName", ""),
670 |             "status": result.get("openStatus", ""),
671 |         }
672 |         return result
673 |     except Exception as e:
674 |         logger.info(f"aqc_detail {e}")
675 |         retry = kwargs.get("retry", 0)
676 |         retry += 1
677 |         if retry >= 2:
678 |             return None
679 |         kwargs["retry"] = retry
680 |         return await aqc_detail(**kwargs)
681 | 
682 | 
683 | # 国家企业信用信息公示系统
684 | async def gsxt(**kwargs):
685 |     try:
686 |         meta = {
687 |             "method": "POST",
688 |             "url": "https://app.gsxt.gov.cn/gsxt/corp-query-app-search-1.html",
689 |             "data": {
690 |                 "conditions": '{"excep_tab":"0","ill_tab":"0","area":"0","cStatus":"0","xzxk":"0","xzcf":"0","dydj":"0"}',
691 |                 "searchword": kwargs.get("key", ""), "sourceType": "W"},
692 |             "headers": {"X-Requested-With": "XMLHttpRequest"},
693 |             "proxy": kwargs.get("proxy", ""),
694 |             "proxy_user": kwargs.get("proxy_user", ""),
695 |             "proxy_pass": kwargs.get("proxy_pass", ""),
696 |         }
697 |         result = await pub_req(**meta)
698 |         if not result: return None
699 |         result = json.loads(result)
700 |         if result.get("data", ""):
701 |             data_list = []
702 |             for r in result["data"]["result"]["data"]:
703 |                 # if not creditCode or r["uniscId"] == creditCode:
704 |                 #     return await gsxt_detail(**{"data": {"pripid": r["pripid"]}})
705 |                 data_list.append({"pripid": r["pripid"]})
706 |             tasks = [asyncio.create_task(gsxt_detail(**{"data": data_list[i], "proxy": kwargs.get("proxy", "")})) for i
707 |                      in
708 |                      range(len(data_list))]
709 |             result = await asyncio.gather(*tasks)
710 |             return [x for x in result if x]
711 |     except Exception as e:
712 |         logger.info(f'gsxt {e}')
713 |         retry = kwargs.get("retry", 0)
714 |         retry += 1
715 |         if retry >= 2:
716 |             return None
717 |         kwargs["retry"] = retry
718 |         return await gsxt(**kwargs)
719 | 
720 | 
721 | # 国家企业信用信息公示系统公司详情信息
722 | async def gsxt_detail(**kwargs):
723 |     data = kwargs.get("data", "")
724 |     try:
725 |         meta = {
726 |             "url": f'https://app.gsxt.gov.cn/gsxt/corp-query-entprise-info-primaryinfoapp-entbaseInfo-{data["pripid"]}.html',
727 |             "params": {"nodeNum": "310000", "entType": "6150", "sourceType": "W"},
728 |             "headers": {"Referer": "https://servicewechat.com", "content-type": "application/x-www-form-urlencoded",
729 |                         "Accept-Encoding": "gzip, deflate, br"},
730 |             "proxy": kwargs.get("proxy", ""),
731 |             "proxy_user": kwargs.get("proxy_user", ""),
732 |             "proxy_pass": kwargs.get("proxy_pass", ""),
733 |         }
734 |         res = await pub_req(**meta)
735 |         result = {
736 |             "name_cn": data.get("entName", "").replace("<font color=red>", "").replace("</font>", ""),
737 |             "status": data.get("corpStatusString", ""),
738 |             "regist_code": data.get("regNo", ""),
739 |             "social_credit_code": data.get("uniscId", ""),
740 |             "legal_person": data.get("legelRep", ""),
741 |             "type": data.get("entTypeString", ""),
742 |             "found_date": data.get("estDate", ""),
743 |             "regist_office": data.get("regOrg", ""),
744 |             "transformer_name": data.get("historyName", "").replace("<font color=red>", "").replace("</font>", ""),
745 |         }
746 |         if not res:
747 |             return result
748 |         res = json.loads(res.decode())
749 |         if res.get("result"):
750 |             result = {
751 |                 "name_cn": res["result"]["entName"],
752 |                 "name_en": "",
753 |                 "legal_person": res["result"]["name"],
754 |                 "registered_capital": f'{res["regCaption"]}{res["regCapCurCN"]}'.strip(),
755 |                 "really_capital": "",
756 |                 "found_date": res["result"]["estDate"],
757 |                 "issue_date": res["result"]["apprDate"],
758 |                 "social_credit_code": res["result"]["uniscId"],
759 |                 "organization_code": "",
760 |                 "regist_code": res["result"]["regNo"],
761 |                 "taxpayer_code": "",
762 |                 "imp_exp_enterprise_code": "",
763 |                 "industry_involved": res["result"]["industryPhy"],
764 |                 "type": res["result"]["entType_CN"],
765 |                 "license_start_date": res["result"]["opFrom"],
766 |                 "license_end_date": res["result"]["opTo"],
767 |                 "regist_office": res["result"]["regOrg_CN"],
768 |                 "staff_size": "",
769 |                 "insured_size": "",
770 |                 "province": res["nodeNum"],
771 |                 "address": res["result"]["dom"],
772 |                 "business_scope": res["result"]["opScope"],
773 |                 "email": "",
774 |                 "unit_phone": "",
775 |                 "fax": "",
776 |                 "website": "",
777 |                 "regist_address": res["result"]["dom"],
778 |                 "transformer_name": data.get("historyName", ""),
779 |                 "status": res["result"]["regState_CN"],
780 |             }
781 |         return result
782 | 
783 |     except Exception as e:
784 |         logger.info(f'gsxt_detail {e}')
785 |         retry = kwargs.get("retry", 0)
786 |         retry += 1
787 |         if retry >= 2:
788 |             return None
789 |         kwargs["retry"] = retry
790 |         return await gsxt_detail(**kwargs)
791 | 
792 | 
793 | async def test():
794 |     # proxy = await get_proxy()
795 |     proxy = 'http://127.0.0.1:1080'
796 |     logger.info(proxy)
797 |     rs = await qcc(**{"key": "特变电工湖南工程有限公司", "proxy": proxy})
798 |     logger.info(rs)
799 |     # tasks = [asyncio.create_task(qcc(**{"key": "特变电工湖南工程有限公司", "proxy": proxy})) for x in range(10)]
800 |     # await asyncio.gather(*tasks)
801 | 
802 | 
803 | if __name__ == '__main__':
804 |     # import uvicorn
805 |     # uvicorn.run(app)
806 |     # proxy = 'http://127.0.0.1:1080'
807 |     proxy = ''
808 |     # rs = asyncio.get_event_loop().run_until_complete(test())
809 |     # rs = asyncio.get_event_loop().run_until_complete(get_proxy())
810 |     # kwargs = {"key": "上海电气集团股份有限公司", "proxy": ""}
811 |     # kwargs = {"key": "上海宽娱数码科技有限公司", "proxy": ""}
812 |     # kwargs = {"key": "厦门臻旻建筑工程有限公司", "proxy": ""}
813 |     kwargs = {"key": "哔哩哔哩", "proxy": ""}
814 |     # kwargs = {"key": "广东携众建筑咨询服务有限公司", "proxy": ""}
815 |     # kwargs = {"key": "上海茗昊机械工程有限公司", "proxy": ""}
816 |     # kwargs = {**kwargs, **sample(rs, 1)[0]}
817 |     # rs = asyncio.get_event_loop().run_until_complete(query_ip(**kwargs))
818 |     # rs = asyncio.get_event_loop().run_until_complete(tyc(**kwargs))
819 |     rs = asyncio.get_event_loop().run_until_complete(tyc_detail(**{"id": "3149889182"}))
820 |     # rs = asyncio.get_event_loop().run_until_complete(qcc(**kwargs))
821 |     # rs = asyncio.get_event_loop().run_until_complete(
822 |     #     qcc_detail(**{"data": {"keyNo": "hbdc8d27a2a556cfcac5001e38f41061"}}))
823 |     # rs = asyncio.get_event_loop().run_until_complete(
824 |     #     qcc_detail(**{"data": {"keyNo": "963f4179841540334d3a16db3fc3567d"}}))
825 |     # rs = asyncio.get_event_loop().run_until_complete(get_proxy(**{"turn": 1}))
826 |     # rs = asyncio.get_event_loop().run_until_complete(
827 |     #     qcc_detail(**{"url": "https://www.qcc.com/firm/963f4179841540334d3a16db3fc3567d.html"}))
828 |     # rs = asyncio.get_event_loop().run_until_complete(aqc(**kwargs))
829 |     # rs = asyncio.get_event_loop().run_until_complete(aqc_detail(**{"data": {"pid": "43880125442188"}}))
830 |     # rs = asyncio.get_event_loop().run_until_complete(gsxt(**kwargs))
831 |     # pripid = "D1FDF711DFE03EE312CC2ACD3CE218AB448EC78EC78E61ABE228E2ABE2ABE2ABEEABE2ABDF960DC782CB82C7647C-1618992356543"
832 |     # pripid = 'AF2B89C7A13640356C1A541B4234667D3A58B958B9581F7D9C7D9C7D9C7D9C7D1FF213F2F9185FBEDC3DDC3D5F18-1629364295083'
833 |     # rs = asyncio.get_event_loop().run_until_complete(gsxt_detail(**{"data": {"pripid": pripid}}))
834 |     # rs = asyncio.get_event_loop().run_until_complete(get_proxy())
835 |     # rs = asyncio.get_event_loop().run_until_complete(query_ip(**{"proxy": "http://182.111.108.203:45113"}))
836 |     logger.info(rs)
837 | 
838 |     # Tunnel connection failed: 401 Authorized failed
839 | 


--------------------------------------------------------------------------------
/看准-企业工商信息查询.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Author: Litre WU
  3 | # E-mail: litre-wu@tutanota.com
  4 | # Software: PyCharm
  5 | # File: 看准-企业工商信息查询.py
  6 | # Time: 1月 06, 2022
  7 | import asyncio
  8 | from aiohttp import ClientSession, ClientTimeout, TCPConnector
  9 | from user_agent import generate_user_agent
 10 | from random import randint
 11 | from time import sleep
 12 | from sys import platform
 13 | from json import loads
 14 | from bs4 import BeautifulSoup
 15 | import pandas as pd
 16 | from fastapi import FastAPI
 17 | from pydantic import BaseModel, Field
 18 | import socket
 19 | from boltons.cacheutils import LRU
 20 | from hashlib import md5
 21 | 
 22 | if platform == "win32":
 23 |     asyncio.set_event_loop(asyncio.ProactorEventLoop())
 24 |     asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
 25 | 
 26 | host = socket.gethostbyname(socket.gethostname())
 27 | 
 28 | lru_cache = LRU(max_size=100)
 29 | 
 30 | tags_metadata = [
 31 |     {
 32 |         "name": "看准-企业工商信息查询接口",
 33 |         "description": "看准-企业工商信息查询",
 34 |         "externalDocs": {
 35 |             "description": "More",
 36 |             "url": f"http://{host}/docs",
 37 |         },
 38 |     },
 39 | ]
 40 | 
 41 | contact = {
 42 |     "name": "Litre",
 43 |     "url": "http://121.37.209.113",
 44 |     "email": "litre-wu@tutanota.com",
 45 | }
 46 | 
 47 | app = FastAPI(openapi_url="/api/v1/api.json", title="看准-企业工商信息查询接口", contact=contact, openapi_tags=tags_metadata)
 48 | 
 49 | 
 50 | class SearchItem(BaseModel):
 51 |     query: str = Field(..., example='哔哩哔哩')
 52 |     cityCode: str = Field(..., example=0)
 53 |     industryCodes: str = Field(..., example='')
 54 |     pageNum: str = Field(..., example=1)
 55 |     limit: str = Field(..., example=15)
 56 | 
 57 | 
 58 | # 查询接口
 59 | @app.post("/search", tags=["看准-工商信息查询"])
 60 | async def search(data: SearchItem):
 61 |     kwargs = data.dict()
 62 |     key = md5(str(kwargs).encode()).hexdigest()
 63 |     if lru_cache.get(key): return lru_cache[key]
 64 |     result = await query(**kwargs)
 65 |     if result: lru_cache[key] = result
 66 |     return result
 67 | 
 68 | 
 69 | class InfoItem(BaseModel):
 70 |     encCompanyId: str = Field(..., example='0XN_2dW7Fw~~')
 71 | 
 72 | 
 73 | # 工商信息接口
 74 | @app.post("/compInfo", tags=["看准-工商信息查询"])
 75 | async def info(data: InfoItem):
 76 |     kwargs = data.dict()
 77 |     key = md5(str(kwargs).encode()).hexdigest()
 78 |     if lru_cache.get(key): return lru_cache[key]
 79 |     result = await compInfo(**kwargs)
 80 |     if result: lru_cache[key] = result
 81 |     return result
 82 | 
 83 | 
 84 | # 公共请求函数
 85 | async def pub_req(**kwargs):
 86 |     method = kwargs.get("method", "GET")
 87 |     url = kwargs.get("url", "")
 88 |     params = kwargs.get("params", {})
 89 |     data = kwargs.get("data", {})
 90 |     headers = {**{"User-Agent": generate_user_agent()}, **kwargs.get("headers", {})}
 91 |     proxy = kwargs.get("proxy", "")
 92 |     timeout = kwargs.get("timeout", 10)
 93 |     try:
 94 |         async with asyncio.Semaphore(20):
 95 |             async with ClientSession(timeout=ClientTimeout(total=3),
 96 |                                      connector=TCPConnector(ssl=False),
 97 |                                      trust_env=True) as client:
 98 |                 async with client.request(method=method, url=url, params=params, data=data, headers=headers,
 99 |                                           proxy=proxy,
100 |                                           timeout=timeout) as rs:
101 |                     if rs.status == 200 or 201:
102 |                         content = await rs.read()
103 |                         return content
104 |                     else:
105 |                         sleep(randint(1, 2))
106 |                         retry = kwargs.get("retry", 0)
107 |                         retry += 1
108 |                         if retry >= 2:
109 |                             return None
110 |                         kwargs["retry"] = retry
111 |                         return await pub_req(**kwargs)
112 |     except Exception as e:
113 |         print(e)
114 |         sleep(randint(1, 2))
115 |         retry = kwargs.get("retry", 0)
116 |         retry += 1
117 |         if retry >= 2:
118 |             return None
119 |         kwargs["retry"] = retry
120 |         return await pub_req(**kwargs)
121 | 
122 | 
123 | # 查询
124 | async def query(**kwargs):
125 |     meta = {
126 |         "url": "https://www.kanzhun.com/search/company_v2.json",
127 |         "params": {
128 |             "query": kwargs.get("query", "哔哩哔哩"),
129 |             "cityCode": kwargs.get("cityCode", 0),
130 |             "industryCodes": kwargs.get("industryCodes", ""),
131 |             "pageNum": kwargs.get("pageNum", 1),
132 |             "limit": kwargs.get("limit", 15),
133 |         },
134 |         "headers": {
135 |             "Accept-Encoding": "gzip, deflate, br"
136 |         }
137 |     }
138 |     res = await pub_req(**meta)
139 |     if not res: return None
140 |     # print(res.decode())
141 |     return loads(res)
142 | 
143 | 
144 | # 工商信息
145 | async def compInfo(**kwargs):
146 |     meta = {
147 |         "url": f'https://www.kanzhun.com/firm/info/{kwargs.get("encCompanyId", "")}.html',
148 |         "headers": {
149 |             "Accept-Encoding": "gzip, deflate, br"
150 |         }
151 |     }
152 |     res = await pub_req(**meta)
153 |     if not res: return None
154 |     soup = BeautifulSoup(res.decode(), 'html.parser')
155 |     div = soup.find_all("div", class_="kz-company-desc")
156 |     if div:
157 |         table = div[0].table
158 |         table = pd.read_html(str(table))
159 |         info = {}
160 |         for x in table[0].values.tolist():
161 |             for i in range(0, len(x), 2):
162 |                 if x[i].strip("-"):
163 |                     info[x[i]] = x[i + 1]
164 |         return info
165 | 
166 | 
167 | async def main():
168 |     rs = await query()
169 |     # rs = await compInfo(**{"encCompanyId": "0XN_2dW7Fw~~"})
170 |     print(rs)
171 | 
172 | 
173 | if __name__ == '__main__':
174 |     asyncio.get_event_loop().run_until_complete(main())
175 | 


--------------------------------------------------------------------------------