├── CMakeLists.txt ├── README.md ├── fast_requests.py ├── fast_requetst_cpp.pyd ├── libcurl.dll ├── main.cpp └── zlib1.dll /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required (VERSION 3.5) 2 | project ("fast_requetst") 3 | 4 | SET(CMAKE_BUILD_TYPE "Release") 5 | 6 | find_package(pybind11 CONFIG REQUIRED) 7 | find_package(cpr CONFIG REQUIRED) 8 | 9 | pybind11_add_module(fast_requetst_cpp main.cpp) 10 | target_link_libraries(fast_requetst_cpp PRIVATE cpr pybind11::embed pybind11::module pybind11::pybind11) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [English](#English) 2 | 3 | ## 介绍 4 | 5 | 这可能是最快的http请求库,暂支持get方法 6 | 7 | ## 要求 8 | 9 | windows平台和python3.7.2 10 | 11 | ## 使用 12 | 13 | ### pip 14 | 15 | `pip install requests-cpp` 16 | 17 | ### 手动导入项目 18 | 19 | 下载`fast_requetst_cpp.pyd`、`fast_requests.py`、`libcurl.dll`、`zlibd1.dll`四个文件,在项目中导入`fast_requests.py` 20 | 21 | ### 部分问题 22 | 23 | 如果text为空的话请设置verifySsl=False 24 | 25 | ## api 26 | 27 | ```python 28 | #timeOut 单位为毫秒 29 | get(urls, headers={}, nThread=5, isDebug=False, timeOut=3000, verifySsl=True) 30 | 31 | sessionGet(urls, headers={}, nThread=5, isDebug=False, timeOut=3000, verifySsl=True) 32 | 33 | #sessionGet和get的区别:sessionGet为每个线程持有一个session对象相当于requests的session,建议使用sessionGet速度更快 34 | urls 请求的网址列表 35 | headers 请求头 36 | nThread 线程数(看自己设备配置,一般设20就差不多了) 37 | isDebug 开启后会打印每个线程每个请求信息 38 | timeOut 超时 39 | verifySsl ssl验证 40 | 返回 Response 对象列表 41 | 42 | Response 对象 43 | 44 | Response.text 正文text文本 45 | Response.status_code 状态码 46 | Response.header 47 | Response.content() 二进制数据 48 | Response.Url url 49 | Response.elapsed 50 | Response.cookies 51 | Response.error 暂且用不了 52 | ``` 53 | 54 | 示例代码: 55 | 56 | ```python 57 | import fast_requests 58 | urls = [] 59 | headers = { 60 | "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36" 61 | } 62 | for i in range(1, 200): 63 | urls.append("https://baidu.com") 64 | for i in fast_requests.get(urls, headers, 20): 65 | print(i.text) 66 | 67 | for i in fast_requests.sessionGet(urls, headers, 20): 68 | print(i.text) 69 | ``` 70 | 71 | ## 原理 72 | 73 | c++ : cpr 74 | python : pybind11 75 | 76 | ------ 77 | 78 | # English 79 | 80 | ## Introduction 81 | 82 | This may be the fastest http request library, temporarily support get method 83 | 84 | ## Claim 85 | 86 | windows platform and python 3.7.2 87 | 88 | ## Use 89 | 90 | ### pip 91 | 92 | `pip install requests-cpp` 93 | 94 | ### Import project manually 95 | 96 | Download four files `fast_requetst_cpp.pyd`, `fast_requests.py`, `libcurl.dll`, `zlibd1.dll`, and import `fast_requests.py` in the project 97 | 98 | ### Some questions 99 | 100 | If text is empty, please set verifySsl=False 101 | 102 | ## api 103 | 104 | ```python 105 | #timeOut in milliseconds 106 | get(urls, headers={}, nThread=5, isDebug=False, timeOut=3000, verifySsl=True) 107 | 108 | sessionGet(urls, headers={}, nThread=5, isDebug=False, timeOut=3000, verifySsl=True) 109 | 110 | #sessionGet and get difference: sessionGet holds a session object for each thread equivalent to requests session, it is recommended to use sessionGet faster 111 | urls list of requested URLs 112 | headers request headers 113 | nThread number of threads (see your own device configuration, generally set to 20 is almost the same) 114 | After isDebug is turned on, it will print each thread and each request information 115 | timeOut timeout 116 | verifySsl ssl verification 117 | Return a list of Response objects 118 | 119 | Response object 120 | 121 | Response.text body text text 122 | Response.status_code status code 123 | Response.header 124 | Response.content() binary data 125 | Response.Url url 126 | Response.elapsed 127 | Response.cookies 128 | Response.error is temporarily unavailable 129 | ``` 130 | 131 | Sample code: 132 | 133 | ```python 134 | import fast_requests 135 | urls = [] 136 | headers = { 137 | "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36" 138 | } 139 | for i in range(1, 200): 140 | urls.append("https://baidu.com") 141 | for i in fast_requests.get(urls, headers, 20): 142 | print(i.text) 143 | 144 | for i in fast_requests.sessionGet(urls, headers, 20): 145 | print(i.text) 146 | ``` 147 | 148 | ## Principle 149 | 150 | c++: cpr 151 | python: pybind11 152 | -------------------------------------------------------------------------------- /fast_requests.py: -------------------------------------------------------------------------------- 1 | import fast_requetst_cpp 2 | 3 | 4 | # timeOut 单位为毫秒 5 | def get(urls, headers={}, nThread=5, isDebug=False, timeOut=3000, verifySsl=True): 6 | return fast_requetst_cpp.run("get", urls, headers, nThread, isDebug, timeOut, verifySsl) 7 | 8 | 9 | def sessionGet(urls, headers={}, nThread=5, isDebug=False, timeOut=3000, verifySsl=True): 10 | return fast_requetst_cpp.run("session", urls, headers, nThread, isDebug, timeOut, verifySsl) 11 | -------------------------------------------------------------------------------- /fast_requetst_cpp.pyd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daimiaopeng/fast_requests/HEAD/fast_requetst_cpp.pyd -------------------------------------------------------------------------------- /libcurl.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daimiaopeng/fast_requests/HEAD/libcurl.dll -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | using namespace cpr; 12 | namespace py = pybind11; 13 | 14 | mutex _mutex; 15 | 16 | void _get(string type, vector urls, bool isDebug, vector &responseList, Header headers, int timeOut, 17 | bool verifySsl) { 18 | vector res; 19 | auto threadId = this_thread::get_id(); 20 | int n = 0; 21 | Session session; 22 | if (type == "session") { 23 | session.SetHeader(headers); 24 | session.SetVerifySsl(verifySsl); 25 | session.SetTimeout(timeOut); 26 | } 27 | for (const auto &url:urls) { 28 | Response r; 29 | if (type == "get") { 30 | r = Get(Url{url}, VerifySsl() = verifySsl, headers, Timeout{timeOut}); 31 | } else if (type == "session") { 32 | session.SetUrl(url); 33 | r = session.Get(); 34 | } 35 | if (isDebug) { 36 | cout << "DEBUG threadId = " << threadId << " status_code = " << r.status_code << " n = " << ++n << endl; 37 | } 38 | res.push_back(move(r)); 39 | } 40 | lock_guard lock(_mutex); 41 | for (auto &i : res) { 42 | responseList.push_back(move(i)); 43 | } 44 | } 45 | 46 | 47 | vector run(string type, vector urls, py::dict head_dict, int nthread = 5, bool isDebug = false, int timeOut = 3000, 48 | bool verifySsl = true) { 49 | cpr::Header headers; 50 | vector responseList; 51 | for (auto item : head_dict) { 52 | headers.insert({string(py::str(item.first)), string(py::str(item.second))}); 53 | } 54 | 55 | 56 | if (urls.empty()) 57 | return responseList; 58 | if (urls.size() <= nthread) { 59 | nthread = urls.size(); 60 | } 61 | 62 | // 分配任务数据 63 | vector > ThreadUrls(nthread); 64 | while (!urls.empty()) { 65 | for (int i = 0; i < nthread; i++) { 66 | if (urls.empty()) { 67 | break; 68 | } 69 | ThreadUrls[i].push_back(urls.back()); 70 | urls.pop_back(); 71 | } 72 | } 73 | 74 | //创建线程池 75 | vector threadPoll; 76 | for (int i = 0; i < nthread; i++) { 77 | thread t(_get, type, ThreadUrls[i], isDebug, ref(responseList), headers, timeOut, verifySsl); 78 | threadPoll.push_back(move(t)); 79 | } 80 | 81 | //运行线程 82 | for (int i = 0; i < nthread; i++) { 83 | threadPoll[i].join(); 84 | } 85 | return responseList; 86 | } 87 | 88 | 89 | PYBIND11_MODULE(fast_requetst_cpp, m) { 90 | m.doc() = "pybind11 fast_requetst module"; 91 | m.def("run", run); 92 | py::class_ response(m, "response"); 93 | response.def(py::init<>()) 94 | .def_readwrite("status_code", &Response::status_code) 95 | .def_readwrite("text", &Response::text) 96 | .def_readwrite("header", &Response::header) 97 | .def_readwrite("Url", &Response::url) 98 | .def_readwrite("elapsed", &Response::elapsed) 99 | .def_readwrite("cookies", &Response::cookies) 100 | .def_readwrite("error", &Response::error) 101 | .def("content", 102 | [](const Response& re) { 103 | return py::bytes(re.text); 104 | } 105 | ); 106 | } -------------------------------------------------------------------------------- /zlib1.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daimiaopeng/fast_requests/HEAD/zlib1.dll --------------------------------------------------------------------------------