├── .gitignore
├── README.md
├── decrypt.py
├── demo.html
├── demo
    ├── __init__.py
    └── get_cookie.py
└── demo2
    ├── __init__.py
    ├── get_cookie.py
    └── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | ### Python template
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | pip-wheel-metadata/
 26 | share/python-wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # celery beat schedule file
 95 | celerybeat-schedule
 96 | 
 97 | # SageMath parsed files
 98 | *.sage.py
 99 | 
100 | # Environments
101 | .env
102 | .venv
103 | env/
104 | venv/
105 | ENV/
106 | env.bak/
107 | venv.bak/
108 | 
109 | # Spyder project settings
110 | .spyderproject
111 | .spyproject
112 | 
113 | # Rope project settings
114 | .ropeproject
115 | 
116 | # mkdocs documentation
117 | /site
118 | 
119 | # mypy
120 | .mypy_cache/
121 | .dmypy.json
122 | dmypy.json
123 | 
124 | # Pyre type checker
125 | .pyre/
126 | 
127 | 将删除 .idea/
128 | 将删除 demo/__init__.py
129 | .idea
130 | venv
131 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 文书网cookie获取两种方式
  2 | 
  3 | > 本项目只做技术探讨，请勿用于违法用途。
  4 | 
  5 | ## 第一种获取方式
  6 | 
  7 | > 文书网cookie获取 2020-04-16（通过splash获取cookie）
  8 | 
  9 | 获取cookie的demo请见[demo](./demo/get_cookie.py)
 10 | 
 11 | 
 12 | ⚠️如果获取cookie的地址是 `https://wenshu.court.gov.cn/` 这是https，那么是不会返回 `80S` 和 `80T` 这两个cookie的，返回的是 `443S` 和 `443T`
 13 | 
 14 | ⚠️获取cookie的地址是 `http://wenshu.court.gov.cn/` 这是http的，那么才会返回 `80S` 和 `80T` 这两个cookie的
 15 | 
 16 | 
 17 | 获取的 cookie 是通过 http的链接获取，后面的爬取也用http
 18 | 
 19 | 获取 cookie 是通过 https 获取的，后面的爬取也用https 即可
 20 | 
 21 | 
 22 | ⚠️注意：请替换为 `https` ， `http` 已阵亡！！！！！！！！！！！！！！！！！！
 23 | 
 24 | 
 25 | ### 获取教程
 26 | 
 27 | 一. 安装splash
 28 |   
 29 |   推荐docker启动一个splash容器
 30 |   ```
 31 |   docker run -it -p 8050:8050 scrapinghub/splash
 32 |   ```
 33 |   
 34 |   [splash安装教程📖](https://splash.readthedocs.io/en/stable/install.html#linux-docker)
 35 | 
 36 | 二. 通过splash获取这三个cookie，代码如下
 37 | 
 38 | [demo](./demo/get_cookie.py)
 39 | 
 40 | 
 41 | ## 第二种获取方式
 42 | 
 43 | > 通过 PyQtWebEngine 获取cookie
 44 | 
 45 | 示例代码如  [demo2](./demo2/get_cookie.py)
 46 | 
 47 | 
 48 | 
 49 | # 反爬应对措施，2019-10-24 再次更新
 50 | 
 51 | > 返回响应代码 202 的解决方式(这个更新了，需要重新逆向这个js，最近没太多时间)
 52 | 
 53 | 1. 这次如果按照以前的请求方式，会返回一个html页面
 54 | 
 55 | 
 56 | 这个页面如 [demo.html](https://github.com/nciefeiniu/wenshu/blob/master/demo.html)
 57 | 
 58 | 
 59 | ```html
 60 |    <html>
 61 | <head>
 62 | </head>
 63 | <body>
 64 | <noscript>
 65 | <h1><strong>请开启JavaScript并刷新该页.</strong></h1>
 66 | </noscript>
 67 | <script type="text/javascript">
 68 | eval(function(p,a,c,k,e,r){e=function(c){return c.toString(a)};if(!''.replace(/^/,String)){while(c--)r[e(c)]=k[c]||e(c);k=[function(e){return r[e]}];e=function(){return'\\w+'};c=1};while(c--)if(k[c])p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c]);return p}('0 1="2";0 3="4";0 5="6";0 7="8";0 9="a";',11,11,'var|dynamicurl|/WZWSREL3dlYnNpdGUvcGFyc2UvcmVzdC5xNHc=|wzwsquestion|*5TLni-b([!,-t$B|wzwsfactor|3740|wzwsmethod|post|wzwsparams|__RequestVerificationToken=IcSXIWWM6HSc4Mp6wXAgWxLd&ciphertext=1001000+1101011+1010100+110001+1011000+1000111+110011+1001011+1100100+1110010+1111010+1101000+1100010+1011000+1101000+1010101+1001111+1000100+1110010+1001000+1010000+1011000+1010101+1110111+110010+110000+110001+111001+110001+110010+110010+110100+1001001+1110111+1111010+1001011+1110011+1110110+1111000+1000101+1101011+1000101+1001000+1010001+1001010+1111000+1110110+1100110+1100111+1100101+1110010+1110111+1001000+1000001+111101+111101&pageId=y83l99c8jdhgfb1y9ja4hh7hkfi1l7ci&queryCondition=%5B%7B%22key%22%3A+%22s15%22%2C+%22value%22%3A+%229727%22%7D%5D&pageSize=5&cfg=com.lawyee.judge.dc.parse.dto.SearchDataDsoDTO%40queryDoc&sortFields=s50%3Adesc&pageNum=1'.split('|'),0,{}));
 69 | 
 70 | var encode_version = 'sojson.v5', jezoh = '__0x3fb5e',  __0x3fb5e=['dcK9wotew5nCu2wvw6nCmsOvQcOONsOk','K8Kow4fDhzDDqwdh','UAATJSU=','wr8gw5HCqWw=','G8KzKhLDkA==','wrLDisOUw4HDiTTCnsKnwqHCg8O2w7XClg==','LmrDog4=','e8Ora13Dow==','wodfacKQw5o=','w74Sw5FreA==','wr94w6LDhMOgw4E=','wpkHw53DgsKKwrHDhcKbQ8Kpwp8=','dWPDons=','w7kbw7vDgMKb','w6DDkFFwwp/Cq3jCjUXDsW8=','TBIbBAfDtw==','wok5w7/ChDZV','wq3CvlzCtw==','wrHDsgzClQ==','IcORUmfDlcOPDsOSwr06fMKgBMKcTQ==','CgjDpSkw','w5oWw5vDhMKk','CcK4wpLDlEnCjnXClg==','w7zDhsKwTMOW','w7jDpFXCvcKm','wrTDlsOUw6rDtA==','w4bDn8KcXsOQVVHDkw==','bMOAwr3CsVzDksKTcAc=','wodAb8KKw4HCrDBoaA==','wrDDlMOUw5LDiQ==','5Lm26ICj5Yu/6ZmGw5zDgxHCnMOywpZDM8KD','VQPChSVsbsOvWMODRMOlwqBAWMKz','U8K8TsOnHsKOWMOpb11CwpjDkcOJZTTChAbCixvDtcO0wplFwoZdwrswWcOiwq1sJsOnw50VHhfDgwXDoMKmDBTClsOkJ1RBKkc3YzQYw4zDuEUgY0xEX8KXwrU=','wpZYQsKjw7Y=','wqLDiMOdw4nDiQ==','wpLCqMO8wpPCiQ==','w54Aw5bDqsKD','K23DsRErcg==','eETDt1Nj','w7XDhsOhwpfCrg==','LBMPdFk=','woXDmQ3Cu8Kl','eGrDrmxAdA==','bcKBwpPDmz3Clw==','KmzDghcL','w6vDu8K7MQY=','wr4+w6fCph0=','WsKSWMO7EQ==','WMKNwp7Dojg=','wosyw6rCnxhP','w7LDhcK8FTI=','wqDCuVPCoFbCgcOgbm3Dkw==','wq4Hw6HCoTDDlg==','TgxLwqPDtg==','wqXCoVfClmY=','wptsw7vDj8OB','wqVow4bDt8O5','QcOma8O2w4U=','D8Ohwo/DvjRK','wpZLX8Kyw5o=','NcKjDgPDjw==','E8K1RcO1w4U1','XA8YGy3DrMKqw49/w5A=','woctw4bChj0=','CMKXw6jDoA7Dmi1Ow4xUGMK4YMKwacO4SMKKc3ldwrQDw4RG'];(function(_0x5bc68b,_0x259158){var _0x102152=function(_0x1797a6){while(--_0x1797a6){_0x5bc68b['push'](_0x5bc68b['shift']());}};_0x102152(++_0x259158);}(__0x3fb5e,0x123));var _0x56ae=function(_0xca96c7,_0x241ea9){_0xca96c7=_0xca96c7-0x0;var _0x57cca1=__0x3fb5e[_0xca96c7];if(_0x56ae['initialized']===undefined){(function(){var _0x228394=typeof window!=='undefined'?window:typeof process==='object'&&typeof require==='function'&&typeof global==='object'?global:this;var _0x356c10='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=';_0x228394['atob']||(_0x228394['atob']=function(_0x16460d){var _0x4e207e=String(_0x16460d)['replace'](/=+$/,'');for(var _0x15f638=0x0,_0x2abf93,_0x3df9f8,_0x479e2a=0x0,_0x411a0f='';_0x3df9f8=_0x4e207e['charAt'](_0x479e2a++);~_0x3df9f8&&(_0x2abf93=_0x15f638%0x4?_0x2abf93*0x40+_0x3df9f8:_0x3df9f8,_0x15f638++%0x4)?_0x411a0f+=String['fromCharCode'](0xff&_0x2abf93>>(-0x2*_0x15f638&0x6)):0x0){_0x3df9f8=_0x356c10['indexOf'](_0x3df9f8);}return _0x411a0f;});}());var _0x172d34=function(_0xa28d48,_0x346449){var _0x55c23f=[],_0x3809ab=0x0,_0x5298ee,_0x3c825f='',_0x8b8e9a='';_0xa28d48=atob(_0xa28d48);for(var _0xee1bef=0x0,_0x3023b5=_0xa28d48['length'];_0xee1bef<_0x3023b5;_0xee1bef++){_0x8b8e9a+='%'+('00'+_0xa28d48['charCodeAt'](_0xee1bef)['toString'](0x10))['slice'](-0x2);}_0xa28d48=decodeURIComponent(_0x8b8e9a);for(var _0x308939=0x0;_0x308939<0x100;_0x308939++){_0x55c23f[_0x308939]=_0x308939;}for(_0x308939=0x0;_0x308939<0x100;_0x308939++){_0x3809ab=(_0x3809ab+_0x55c23f[_0x308939]+_0x346449['charCodeAt'](_0x308939%_0x346449['length']))%0x100;_0x5298ee=_0x55c23f[_0x308939];_0x55c23f[_0x308939]=_0x55c23f[_0x3809ab];_0x55c23f[_0x3809ab]=_0x5298ee;}_0x308939=0x0;_0x3809ab=0x0;for(var _0x66c563=0x0;_0x66c563<_0xa28d48['length'];_0x66c563++){_0x308939=(_0x308939+0x1)%0x100;_0x3809ab=(_0x3809ab+_0x55c23f[_0x308939])%0x100;_0x5298ee=_0x55c23f[_0x308939];_0x55c23f[_0x308939]=_0x55c23f[_0x3809ab];_0x55c23f[_0x3809ab]=_0x5298ee;_0x3c825f+=String['fromCharCode'](_0xa28d48['charCodeAt'](_0x66c563)^_0x55c23f[(_0x55c23f[_0x308939]+_0x55c23f[_0x3809ab])%0x100]);}return _0x3c825f;};_0x56ae['rc4']=_0x172d34;_0x56ae['data']={};_0x56ae['initialized']=!![];}var _0x190c72=_0x56ae['data'][_0xca96c7];if(_0x190c72===undefined){if(_0x56ae['once']===undefined){_0x56ae['once']=!![];}_0x57cca1=_0x56ae['rc4'](_0x57cca1,_0x241ea9);_0x56ae['data'][_0xca96c7]=_0x57cca1;}else{_0x57cca1=_0x190c72;}return _0x57cca1;};function _0x412a72(_0x2a28c0){var _0x4257c9={'bwGZX':_0x56ae('0x0','jo5I'),'mGirf':function _0x2eb028(_0x5ab0bc,_0x5505f4){return _0x5ab0bc<_0x5505f4;},'hOkXt':function _0x16449b(_0x22286c,_0x41c8cd){return _0x22286c&_0x41c8cd;},'RJeYY':function _0x24beb6(_0x59303b,_0x576d3b){return _0x59303b==_0x576d3b;},'cFxMb':function _0x45b03c(_0xadce3d,_0x5416a9){return _0xadce3d>>_0x5416a9;},'spzgJ':function _0x3c313d(_0x19fd11,_0xcacabb){return _0x19fd11<<_0xcacabb;},'VdlKD':function _0x2427d5(_0x23b25b,_0x23b39e){return _0x23b25b&_0x23b39e;},'VDeWo':function _0x1ef1b0(_0x476993,_0x40dd2a){return _0x476993==_0x40dd2a;},'gHLRp':function _0x16afb3(_0x4bdebb,_0x1065a7){return _0x4bdebb>>_0x1065a7;},'biRta':function _0x301047(_0x2ada60,_0x1c4232){return _0x2ada60|_0x1c4232;},'oKMpY':function _0x1d0b02(_0x547e37,_0x500868){return _0x547e37<<_0x500868;},'HlUXJ':function _0x21902c(_0x16ae1a,_0x466bbf){return _0x16ae1a>>_0x466bbf;},'vuJTm':function _0x2fea95(_0x34f7b5,_0x59e46f){return _0x34f7b5<<_0x59e46f;},'lHuwG':function _0x1339d0(_0x3c775a,_0x3450ae){return _0x3c775a>>_0x3450ae;},'fpeDs':function _0x52b661(_0x318fc3,_0x59aa7b){return _0x318fc3&_0x59aa7b;},'HqwlU':function _0x2144ca(_0x4799d4,_0x25b745){return _0x4799d4|_0x25b745;},'nPBKx':function _0x42b833(_0xe339b1,_0x5c500c){return _0xe339b1&_0x5c500c;},'ZRhVT':function _0xc9529d(_0x5ed560,_0x4383da){return _0x5ed560&_0x4383da;},'bdZKt':_0x56ae('0x1','5jBa')};var _0x6c47cd=_0x4257c9[_0x56ae('0x2','LFWf')][_0x56ae('0x3','Q@8l')]('|'),_0x3a5836=0x0;while(!![]){switch(_0x6c47cd[_0x3a5836++]){case'0':_0x27d1f5='';continue;case'1':var _0x27d1f5,_0x4262d0,_0xc876d4;continue;case'2':_0x4262d0=0x0;continue;case'3':while(_0x4257c9[_0x56ae('0x4','*h#g')](_0x4262d0,_0xc876d4)){_0x5526a7=_0x4257c9[_0x56ae('0x5','a6w(')](_0x2a28c0['charCodeAt'](_0x4262d0++),0xff);if(_0x4257c9['RJeYY'](_0x4262d0,_0xc876d4)){_0x27d1f5+=_0x2097d8[_0x56ae('0x6',')Z%%')](_0x4257c9[_0x56ae('0x7','iAGA')](_0x5526a7,0x2));_0x27d1f5+=_0x2097d8['charAt'](_0x4257c9[_0x56ae('0x8','IM$w')](_0x4257c9[_0x56ae('0x9','Dk(l')](_0x5526a7,0x3),0x4));_0x27d1f5+='==';break;}_0x138cf5=_0x2a28c0['charCodeAt'](_0x4262d0++);if(_0x4257c9[_0x56ae('0xa','HLR(')](_0x4262d0,_0xc876d4)){_0x27d1f5+=_0x2097d8[_0x56ae('0xb','iAGA')](_0x4257c9['gHLRp'](_0x5526a7,0x2));_0x27d1f5+=_0x2097d8[_0x56ae('0xc','j%QO')](_0x4257c9[_0x56ae('0xd',')Z%%')](_0x4257c9[_0x56ae('0xe','L6ge')](_0x4257c9[_0x56ae('0xf','02EH')](_0x5526a7,0x3),0x4),_0x4257c9[_0x56ae('0x10','5jBa')](_0x4257c9[_0x56ae('0x11','j%QO')](_0x138cf5,0xf0),0x4)));_0x27d1f5+=_0x2097d8[_0x56ae('0x12','02EH')](_0x4257c9[_0x56ae('0x13','L6ge')](_0x4257c9['VdlKD'](_0x138cf5,0xf),0x2));_0x27d1f5+='=';break;}_0x4093e6=_0x2a28c0[_0x56ae('0x14','%FZJ')](_0x4262d0++);_0x27d1f5+=_0x2097d8[_0x56ae('0x15','d2rH')](_0x4257c9['lHuwG'](_0x5526a7,0x2));_0x27d1f5+=_0x2097d8['charAt'](_0x4257c9[_0x56ae('0x16','Zp5!')](_0x4257c9['VdlKD'](_0x5526a7,0x3)<<0x4,_0x4257c9[_0x56ae('0x17','%FZJ')](_0x138cf5,0xf0)>>0x4));_0x27d1f5+=_0x2097d8[_0x56ae('0x12','02EH')](_0x4257c9[_0x56ae('0x18','*FHt')](_0x4257c9[_0x56ae('0x19','*FHt')](_0x4257c9['nPBKx'](_0x138cf5,0xf),0x2),_0x4257c9[_0x56ae('0x1a','scqQ')](_0x4093e6,0xc0)>>0x6));_0x27d1f5+=_0x2097d8[_0x56ae('0x1b','eygr')](_0x4257c9['ZRhVT'](_0x4093e6,0x3f));}continue;case'4':return _0x27d1f5;case'5':_0xc876d4=_0x2a28c0['length'];continue;case'6':var _0x5526a7,_0x138cf5,_0x4093e6;continue;case'7':var _0x2097d8=_0x4257c9[_0x56ae('0x1c','LFWf')];continue;}break;}}function _0x344cd4(){var _0x53d9fc={'GjCbS':function _0x1a0314(_0x33da81,_0xe25eb5){return _0x33da81<_0xe25eb5;},'JBFUL':function _0x1af799(_0x51aa2f,_0x2e4887){return _0x51aa2f+_0x2e4887;}};var _0x3c9135=0x0;var _0x43beea=0x0;for(_0x43beea=0x0;_0x53d9fc[_0x56ae('0x1d','uGC9')](_0x43beea,wzwsquestion[_0x56ae('0x1e','V2r4')]);_0x43beea++){_0x3c9135+=wzwsquestion[_0x56ae('0x1f','!2cw')](_0x43beea);}_0x3c9135*=wzwsfactor;_0x3c9135+=0x1b207;return _0x53d9fc[_0x56ae('0x20','d2rH')](_0x56ae('0x21','Rau%'),_0x3c9135);}function _0x2ff265(_0x26b826,_0xea8bd1){var _0x253f74={'ogjLK':_0x56ae('0x22','Qy14'),'izgsL':'post','eMCME':function _0x3b581c(_0xd2391,_0x1a9ef1){return _0xd2391!=_0x1a9ef1;},'aCWaI':function _0x5c65fc(_0x1402c7,_0x41e446){return _0x1402c7<_0x41e446;},'OTFrl':_0x56ae('0x23','Rau%')};var _0x370b5e=_0x253f74[_0x56ae('0x24','!2cw')][_0x56ae('0x25','i[Ts')]('|'),_0x1ba457=0x0;while(!![]){switch(_0x370b5e[_0x1ba457++]){case'0':_0x15a9ed['method']=_0x253f74[_0x56ae('0x26','uGC9')];continue;case'1':return _0x15a9ed;case'2':var _0x15a9ed=document[_0x56ae('0x27','Q@8l')](_0x56ae('0x28',')Z%%'));continue;case'3':if(_0x253f74[_0x56ae('0x29','YXCs')](_0xea8bd1['search']('='),-0x1)){var _0x573df6=_0xea8bd1[_0x56ae('0x2a','LFWf')]('&');for(var _0x426cb4=0x0;_0x253f74[_0x56ae('0x2b','57vf')](_0x426cb4,_0x573df6[_0x56ae('0x2c','*FHt')]);_0x426cb4++){var _0x3ddbc7=_0x56ae('0x2d','V]Be')['split']('|'),_0x1fdb10=0x0;while(!![]){switch(_0x3ddbc7[_0x1fdb10++]){case'0':_0x2a293f[_0x56ae('0x2e','iAGA')]=_0x422f0a[0x0];continue;case'1':var _0x2a293f=document['createElement'](_0x253f74[_0x56ae('0x2f','a6w(')]);continue;case'2':var _0x422f0a=_0x8ad1c0['split']('=');continue;case'3':var _0x8ad1c0=_0x573df6[_0x426cb4];continue;case'4':_0x15a9ed[_0x56ae('0x30','WuNj')](_0x2a293f);continue;case'5':_0x2a293f['value']=_0x422f0a[0x1];continue;}break;}}}continue;case'4':_0x15a9ed[_0x56ae('0x31','!2cw')]();continue;case'5':_0x15a9ed[_0x56ae('0x32','02EH')]=_0x26b826;continue;case'6':_0x15a9ed['style']['display']=_0x56ae('0x33','%FZJ');continue;case'7':document[_0x56ae('0x34','HLR(')]['appendChild'](_0x15a9ed);continue;}break;}}function _0x33f22a(){var _0x532424={'hwQpj':function _0x3b4af9(_0x2ff2ab){return _0x2ff2ab();},'lYfvS':function _0x242f23(_0x57f673,_0x33b4b3){return _0x57f673(_0x33b4b3);},'VvOsr':function _0x33a26c(_0xb8a476,_0x580dd6){return _0xb8a476+_0x580dd6;},'vOmWg':_0x56ae('0x35','YXCs'),'LaaBO':function _0x1b637c(_0x5c57e1,_0x41b90a){return _0x5c57e1==_0x41b90a;},'eneJI':'post'};var _0xb14971=_0x532424[_0x56ae('0x36','jo5I')](_0x344cd4);var _0x10ace8=_0x532424[_0x56ae('0x37','a6w(')](_0x412a72,_0xb14971[_0x56ae('0x38','*8t[')]());var _0x35ace3=_0x532424[_0x56ae('0x39',')9A&')](dynamicurl,_0x532424[_0x56ae('0x3a','N&Yh')])+_0x10ace8;if(_0x532424['LaaBO'](wzwsmethod,_0x532424[_0x56ae('0x3b','Q@8l')])){_0x2ff265(_0x35ace3,wzwsparams);}else{window[_0x56ae('0x3c',')9A&')]=_0x35ace3;}}_0x33f22a();;if(!(typeof encode_version!==_0x56ae('0x3d','QE(m')&&encode_version===_0x56ae('0x3e','LFWf'))){window[_0x56ae('0x3f','Q@8l')](_0x56ae('0x40','YtnB'));};encode_version = 'sojson.v5';
 71 | </script>
 72 | 
 73 | </body>
 74 | </html>
 75 | 
 76 | ```
 77 | 
 78 | 
 79 | 2. 用浏览器打开这个文件，会发现会重定向到一个新的URL
 80 | 
 81 | 如：http://localhost:63342/WZWSREL3dlYnNpdGUvcGFyc2UvcmVzdC5xNHc=?wzwschallenge=V1pXU19DT05GSVJNX1BSRUZJWF9MQUJFTDQxNjUyNzE=
 82 | 
 83 | 因为是本地打开的，所以域名是`localhost:63342`
 84 | 
 85 | 3. 把这个本地地址换成 `http://wenshu.court.gov.cn`这个后
 86 | 
 87 | 神奇的事情发生了，可以获取到数据了。而且后面的请求也没返回这个 `html` 文件了。
 88 | 
 89 | 
 90 | #### 所以这次反爬解决方案
 91 | 
 92 | 1. 在请求返回的地方增加一个判断，如果是 `html` 文件，那么就解析这个文件，获取新的URL，并重试，发送 `post` 请求即可。
 93 | 
 94 | 2. 这个html怎么解析？？
 95 | 
 96 | ~这个可以看看 @songguoxiong 的项目下的 [decrypt.py文件](https://github.com/songguoxiong/wenshu_utils/blob/master/wenshu_utils/old/wzws/decrypt.py)~
 97 | 
 98 | 请看 `decrypt.py` 文件
 99 | 
100 | ⚠️ 注意 splash返回的cookie中，需要去除 `wzws_cid` 这个cookie


--------------------------------------------------------------------------------
/decrypt.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import base64
 3 | 
 4 | from urllib.parse import urljoin
 5 | 
 6 | 
 7 | _pattern = re.compile(r"dynamicurl\|(?P<path>.+?)\|wzwsquestion\|(?P<question>.+?)\|wzwsfactor\|(?P<factor>\d+)")
 8 | 
 9 | 
10 | def decrypt_wzws(text: str) -> str:
11 |     # noinspection PyBroadException
12 |     try:
13 |         return _decrypt_by_python(text)
14 |     except Exception:
15 |         print("解析html错误")
16 | 
17 | 
18 | def _decrypt_by_python(text: str) -> str:
19 |     base_url = "http://wenshu.court.gov.cn"
20 | 
21 |     group_dict = _pattern.search(text).groupdict()
22 |     question = group_dict["question"]
23 |     factor = int(group_dict["factor"])
24 |     path = group_dict["path"]
25 | 
26 |     label = "WZWS_CONFIRM_PREFIX_LABEL{}".format(sum(ord(i) for i in question) * factor + 111111)
27 |     challenge = base64.b64encode(label.encode()).decode()
28 | 
29 |     dynamic_url = urljoin(base_url, path)
30 |     dynamic_url = "{url}?{query}".format(url=dynamic_url, query="wzwschallenge={}".format(challenge))
31 |     return dynamic_url
32 |     
33 |    
34 | if __name__ == "__main__":
35 |     with open("demo.html") as f:
36 |         _content = f.read()
37 |     _resp = decrypt_wzws(_content)
38 |     print(_resp)
39 | 


--------------------------------------------------------------------------------
/demo.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head>
 3 | </head>
 4 | <body>
 5 | <noscript>
 6 | <h1><strong>请开启JavaScript并刷新该页.</strong></h1>
 7 | </noscript>
 8 | <script type="text/javascript">
 9 | eval(function(p,a,c,k,e,r){e=function(c){return c.toString(a)};if(!''.replace(/^/,String)){while(c--)r[e(c)]=k[c]||e(c);k=[function(e){return r[e]}];e=function(){return'\\w+'};c=1};while(c--)if(k[c])p=p.replace(new RegExp('\\b'+e(c)+'\\b','g'),k[c]);return p}('0 1="2";0 3="4";0 5="6";0 7="8";0 9="a";',11,11,'var|dynamicurl|/WZWSREL3dlYnNpdGUvcGFyc2UvcmVzdC5xNHc=|wzwsquestion|*5TLni-b([!,-t$B|wzwsfactor|3740|wzwsmethod|post|wzwsparams|__RequestVerificationToken=IcSXIWWM6HSc4Mp6wXAgWxLd&ciphertext=1001000+1101011+1010100+110001+1011000+1000111+110011+1001011+1100100+1110010+1111010+1101000+1100010+1011000+1101000+1010101+1001111+1000100+1110010+1001000+1010000+1011000+1010101+1110111+110010+110000+110001+111001+110001+110010+110010+110100+1001001+1110111+1111010+1001011+1110011+1110110+1111000+1000101+1101011+1000101+1001000+1010001+1001010+1111000+1110110+1100110+1100111+1100101+1110010+1110111+1001000+1000001+111101+111101&pageId=y83l99c8jdhgfb1y9ja4hh7hkfi1l7ci&queryCondition=%5B%7B%22key%22%3A+%22s15%22%2C+%22value%22%3A+%229727%22%7D%5D&pageSize=5&cfg=com.lawyee.judge.dc.parse.dto.SearchDataDsoDTO%40queryDoc&sortFields=s50%3Adesc&pageNum=1'.split('|'),0,{}));
10 | 
11 | var encode_version = 'sojson.v5', jezoh = '__0x3fb5e',  __0x3fb5e=['dcK9wotew5nCu2wvw6nCmsOvQcOONsOk','K8Kow4fDhzDDqwdh','UAATJSU=','wr8gw5HCqWw=','G8KzKhLDkA==','wrLDisOUw4HDiTTCnsKnwqHCg8O2w7XClg==','LmrDog4=','e8Ora13Dow==','wodfacKQw5o=','w74Sw5FreA==','wr94w6LDhMOgw4E=','wpkHw53DgsKKwrHDhcKbQ8Kpwp8=','dWPDons=','w7kbw7vDgMKb','w6DDkFFwwp/Cq3jCjUXDsW8=','TBIbBAfDtw==','wok5w7/ChDZV','wq3CvlzCtw==','wrHDsgzClQ==','IcORUmfDlcOPDsOSwr06fMKgBMKcTQ==','CgjDpSkw','w5oWw5vDhMKk','CcK4wpLDlEnCjnXClg==','w7zDhsKwTMOW','w7jDpFXCvcKm','wrTDlsOUw6rDtA==','w4bDn8KcXsOQVVHDkw==','bMOAwr3CsVzDksKTcAc=','wodAb8KKw4HCrDBoaA==','wrDDlMOUw5LDiQ==','5Lm26ICj5Yu/6ZmGw5zDgxHCnMOywpZDM8KD','VQPChSVsbsOvWMODRMOlwqBAWMKz','U8K8TsOnHsKOWMOpb11CwpjDkcOJZTTChAbCixvDtcO0wplFwoZdwrswWcOiwq1sJsOnw50VHhfDgwXDoMKmDBTClsOkJ1RBKkc3YzQYw4zDuEUgY0xEX8KXwrU=','wpZYQsKjw7Y=','wqLDiMOdw4nDiQ==','wpLCqMO8wpPCiQ==','w54Aw5bDqsKD','K23DsRErcg==','eETDt1Nj','w7XDhsOhwpfCrg==','LBMPdFk=','woXDmQ3Cu8Kl','eGrDrmxAdA==','bcKBwpPDmz3Clw==','KmzDghcL','w6vDu8K7MQY=','wr4+w6fCph0=','WsKSWMO7EQ==','WMKNwp7Dojg=','wosyw6rCnxhP','w7LDhcK8FTI=','wqDCuVPCoFbCgcOgbm3Dkw==','wq4Hw6HCoTDDlg==','TgxLwqPDtg==','wqXCoVfClmY=','wptsw7vDj8OB','wqVow4bDt8O5','QcOma8O2w4U=','D8Ohwo/DvjRK','wpZLX8Kyw5o=','NcKjDgPDjw==','E8K1RcO1w4U1','XA8YGy3DrMKqw49/w5A=','woctw4bChj0=','CMKXw6jDoA7Dmi1Ow4xUGMK4YMKwacO4SMKKc3ldwrQDw4RG'];(function(_0x5bc68b,_0x259158){var _0x102152=function(_0x1797a6){while(--_0x1797a6){_0x5bc68b['push'](_0x5bc68b['shift']());}};_0x102152(++_0x259158);}(__0x3fb5e,0x123));var _0x56ae=function(_0xca96c7,_0x241ea9){_0xca96c7=_0xca96c7-0x0;var _0x57cca1=__0x3fb5e[_0xca96c7];if(_0x56ae['initialized']===undefined){(function(){var _0x228394=typeof window!=='undefined'?window:typeof process==='object'&&typeof require==='function'&&typeof global==='object'?global:this;var _0x356c10='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=';_0x228394['atob']||(_0x228394['atob']=function(_0x16460d){var _0x4e207e=String(_0x16460d)['replace'](/=+$/,'');for(var _0x15f638=0x0,_0x2abf93,_0x3df9f8,_0x479e2a=0x0,_0x411a0f='';_0x3df9f8=_0x4e207e['charAt'](_0x479e2a++);~_0x3df9f8&&(_0x2abf93=_0x15f638%0x4?_0x2abf93*0x40+_0x3df9f8:_0x3df9f8,_0x15f638++%0x4)?_0x411a0f+=String['fromCharCode'](0xff&_0x2abf93>>(-0x2*_0x15f638&0x6)):0x0){_0x3df9f8=_0x356c10['indexOf'](_0x3df9f8);}return _0x411a0f;});}());var _0x172d34=function(_0xa28d48,_0x346449){var _0x55c23f=[],_0x3809ab=0x0,_0x5298ee,_0x3c825f='',_0x8b8e9a='';_0xa28d48=atob(_0xa28d48);for(var _0xee1bef=0x0,_0x3023b5=_0xa28d48['length'];_0xee1bef<_0x3023b5;_0xee1bef++){_0x8b8e9a+='%'+('00'+_0xa28d48['charCodeAt'](_0xee1bef)['toString'](0x10))['slice'](-0x2);}_0xa28d48=decodeURIComponent(_0x8b8e9a);for(var _0x308939=0x0;_0x308939<0x100;_0x308939++){_0x55c23f[_0x308939]=_0x308939;}for(_0x308939=0x0;_0x308939<0x100;_0x308939++){_0x3809ab=(_0x3809ab+_0x55c23f[_0x308939]+_0x346449['charCodeAt'](_0x308939%_0x346449['length']))%0x100;_0x5298ee=_0x55c23f[_0x308939];_0x55c23f[_0x308939]=_0x55c23f[_0x3809ab];_0x55c23f[_0x3809ab]=_0x5298ee;}_0x308939=0x0;_0x3809ab=0x0;for(var _0x66c563=0x0;_0x66c563<_0xa28d48['length'];_0x66c563++){_0x308939=(_0x308939+0x1)%0x100;_0x3809ab=(_0x3809ab+_0x55c23f[_0x308939])%0x100;_0x5298ee=_0x55c23f[_0x308939];_0x55c23f[_0x308939]=_0x55c23f[_0x3809ab];_0x55c23f[_0x3809ab]=_0x5298ee;_0x3c825f+=String['fromCharCode'](_0xa28d48['charCodeAt'](_0x66c563)^_0x55c23f[(_0x55c23f[_0x308939]+_0x55c23f[_0x3809ab])%0x100]);}return _0x3c825f;};_0x56ae['rc4']=_0x172d34;_0x56ae['data']={};_0x56ae['initialized']=!![];}var _0x190c72=_0x56ae['data'][_0xca96c7];if(_0x190c72===undefined){if(_0x56ae['once']===undefined){_0x56ae['once']=!![];}_0x57cca1=_0x56ae['rc4'](_0x57cca1,_0x241ea9);_0x56ae['data'][_0xca96c7]=_0x57cca1;}else{_0x57cca1=_0x190c72;}return _0x57cca1;};function _0x412a72(_0x2a28c0){var _0x4257c9={'bwGZX':_0x56ae('0x0','jo5I'),'mGirf':function _0x2eb028(_0x5ab0bc,_0x5505f4){return _0x5ab0bc<_0x5505f4;},'hOkXt':function _0x16449b(_0x22286c,_0x41c8cd){return _0x22286c&_0x41c8cd;},'RJeYY':function _0x24beb6(_0x59303b,_0x576d3b){return _0x59303b==_0x576d3b;},'cFxMb':function _0x45b03c(_0xadce3d,_0x5416a9){return _0xadce3d>>_0x5416a9;},'spzgJ':function _0x3c313d(_0x19fd11,_0xcacabb){return _0x19fd11<<_0xcacabb;},'VdlKD':function _0x2427d5(_0x23b25b,_0x23b39e){return _0x23b25b&_0x23b39e;},'VDeWo':function _0x1ef1b0(_0x476993,_0x40dd2a){return _0x476993==_0x40dd2a;},'gHLRp':function _0x16afb3(_0x4bdebb,_0x1065a7){return _0x4bdebb>>_0x1065a7;},'biRta':function _0x301047(_0x2ada60,_0x1c4232){return _0x2ada60|_0x1c4232;},'oKMpY':function _0x1d0b02(_0x547e37,_0x500868){return _0x547e37<<_0x500868;},'HlUXJ':function _0x21902c(_0x16ae1a,_0x466bbf){return _0x16ae1a>>_0x466bbf;},'vuJTm':function _0x2fea95(_0x34f7b5,_0x59e46f){return _0x34f7b5<<_0x59e46f;},'lHuwG':function _0x1339d0(_0x3c775a,_0x3450ae){return _0x3c775a>>_0x3450ae;},'fpeDs':function _0x52b661(_0x318fc3,_0x59aa7b){return _0x318fc3&_0x59aa7b;},'HqwlU':function _0x2144ca(_0x4799d4,_0x25b745){return _0x4799d4|_0x25b745;},'nPBKx':function _0x42b833(_0xe339b1,_0x5c500c){return _0xe339b1&_0x5c500c;},'ZRhVT':function _0xc9529d(_0x5ed560,_0x4383da){return _0x5ed560&_0x4383da;},'bdZKt':_0x56ae('0x1','5jBa')};var _0x6c47cd=_0x4257c9[_0x56ae('0x2','LFWf')][_0x56ae('0x3','Q@8l')]('|'),_0x3a5836=0x0;while(!![]){switch(_0x6c47cd[_0x3a5836++]){case'0':_0x27d1f5='';continue;case'1':var _0x27d1f5,_0x4262d0,_0xc876d4;continue;case'2':_0x4262d0=0x0;continue;case'3':while(_0x4257c9[_0x56ae('0x4','*h#g')](_0x4262d0,_0xc876d4)){_0x5526a7=_0x4257c9[_0x56ae('0x5','a6w(')](_0x2a28c0['charCodeAt'](_0x4262d0++),0xff);if(_0x4257c9['RJeYY'](_0x4262d0,_0xc876d4)){_0x27d1f5+=_0x2097d8[_0x56ae('0x6',')Z%%')](_0x4257c9[_0x56ae('0x7','iAGA')](_0x5526a7,0x2));_0x27d1f5+=_0x2097d8['charAt'](_0x4257c9[_0x56ae('0x8','IM$w')](_0x4257c9[_0x56ae('0x9','Dk(l')](_0x5526a7,0x3),0x4));_0x27d1f5+='==';break;}_0x138cf5=_0x2a28c0['charCodeAt'](_0x4262d0++);if(_0x4257c9[_0x56ae('0xa','HLR(')](_0x4262d0,_0xc876d4)){_0x27d1f5+=_0x2097d8[_0x56ae('0xb','iAGA')](_0x4257c9['gHLRp'](_0x5526a7,0x2));_0x27d1f5+=_0x2097d8[_0x56ae('0xc','j%QO')](_0x4257c9[_0x56ae('0xd',')Z%%')](_0x4257c9[_0x56ae('0xe','L6ge')](_0x4257c9[_0x56ae('0xf','02EH')](_0x5526a7,0x3),0x4),_0x4257c9[_0x56ae('0x10','5jBa')](_0x4257c9[_0x56ae('0x11','j%QO')](_0x138cf5,0xf0),0x4)));_0x27d1f5+=_0x2097d8[_0x56ae('0x12','02EH')](_0x4257c9[_0x56ae('0x13','L6ge')](_0x4257c9['VdlKD'](_0x138cf5,0xf),0x2));_0x27d1f5+='=';break;}_0x4093e6=_0x2a28c0[_0x56ae('0x14','%FZJ')](_0x4262d0++);_0x27d1f5+=_0x2097d8[_0x56ae('0x15','d2rH')](_0x4257c9['lHuwG'](_0x5526a7,0x2));_0x27d1f5+=_0x2097d8['charAt'](_0x4257c9[_0x56ae('0x16','Zp5!')](_0x4257c9['VdlKD'](_0x5526a7,0x3)<<0x4,_0x4257c9[_0x56ae('0x17','%FZJ')](_0x138cf5,0xf0)>>0x4));_0x27d1f5+=_0x2097d8[_0x56ae('0x12','02EH')](_0x4257c9[_0x56ae('0x18','*FHt')](_0x4257c9[_0x56ae('0x19','*FHt')](_0x4257c9['nPBKx'](_0x138cf5,0xf),0x2),_0x4257c9[_0x56ae('0x1a','scqQ')](_0x4093e6,0xc0)>>0x6));_0x27d1f5+=_0x2097d8[_0x56ae('0x1b','eygr')](_0x4257c9['ZRhVT'](_0x4093e6,0x3f));}continue;case'4':return _0x27d1f5;case'5':_0xc876d4=_0x2a28c0['length'];continue;case'6':var _0x5526a7,_0x138cf5,_0x4093e6;continue;case'7':var _0x2097d8=_0x4257c9[_0x56ae('0x1c','LFWf')];continue;}break;}}function _0x344cd4(){var _0x53d9fc={'GjCbS':function _0x1a0314(_0x33da81,_0xe25eb5){return _0x33da81<_0xe25eb5;},'JBFUL':function _0x1af799(_0x51aa2f,_0x2e4887){return _0x51aa2f+_0x2e4887;}};var _0x3c9135=0x0;var _0x43beea=0x0;for(_0x43beea=0x0;_0x53d9fc[_0x56ae('0x1d','uGC9')](_0x43beea,wzwsquestion[_0x56ae('0x1e','V2r4')]);_0x43beea++){_0x3c9135+=wzwsquestion[_0x56ae('0x1f','!2cw')](_0x43beea);}_0x3c9135*=wzwsfactor;_0x3c9135+=0x1b207;return _0x53d9fc[_0x56ae('0x20','d2rH')](_0x56ae('0x21','Rau%'),_0x3c9135);}function _0x2ff265(_0x26b826,_0xea8bd1){var _0x253f74={'ogjLK':_0x56ae('0x22','Qy14'),'izgsL':'post','eMCME':function _0x3b581c(_0xd2391,_0x1a9ef1){return _0xd2391!=_0x1a9ef1;},'aCWaI':function _0x5c65fc(_0x1402c7,_0x41e446){return _0x1402c7<_0x41e446;},'OTFrl':_0x56ae('0x23','Rau%')};var _0x370b5e=_0x253f74[_0x56ae('0x24','!2cw')][_0x56ae('0x25','i[Ts')]('|'),_0x1ba457=0x0;while(!![]){switch(_0x370b5e[_0x1ba457++]){case'0':_0x15a9ed['method']=_0x253f74[_0x56ae('0x26','uGC9')];continue;case'1':return _0x15a9ed;case'2':var _0x15a9ed=document[_0x56ae('0x27','Q@8l')](_0x56ae('0x28',')Z%%'));continue;case'3':if(_0x253f74[_0x56ae('0x29','YXCs')](_0xea8bd1['search']('='),-0x1)){var _0x573df6=_0xea8bd1[_0x56ae('0x2a','LFWf')]('&');for(var _0x426cb4=0x0;_0x253f74[_0x56ae('0x2b','57vf')](_0x426cb4,_0x573df6[_0x56ae('0x2c','*FHt')]);_0x426cb4++){var _0x3ddbc7=_0x56ae('0x2d','V]Be')['split']('|'),_0x1fdb10=0x0;while(!![]){switch(_0x3ddbc7[_0x1fdb10++]){case'0':_0x2a293f[_0x56ae('0x2e','iAGA')]=_0x422f0a[0x0];continue;case'1':var _0x2a293f=document['createElement'](_0x253f74[_0x56ae('0x2f','a6w(')]);continue;case'2':var _0x422f0a=_0x8ad1c0['split']('=');continue;case'3':var _0x8ad1c0=_0x573df6[_0x426cb4];continue;case'4':_0x15a9ed[_0x56ae('0x30','WuNj')](_0x2a293f);continue;case'5':_0x2a293f['value']=_0x422f0a[0x1];continue;}break;}}}continue;case'4':_0x15a9ed[_0x56ae('0x31','!2cw')]();continue;case'5':_0x15a9ed[_0x56ae('0x32','02EH')]=_0x26b826;continue;case'6':_0x15a9ed['style']['display']=_0x56ae('0x33','%FZJ');continue;case'7':document[_0x56ae('0x34','HLR(')]['appendChild'](_0x15a9ed);continue;}break;}}function _0x33f22a(){var _0x532424={'hwQpj':function _0x3b4af9(_0x2ff2ab){return _0x2ff2ab();},'lYfvS':function _0x242f23(_0x57f673,_0x33b4b3){return _0x57f673(_0x33b4b3);},'VvOsr':function _0x33a26c(_0xb8a476,_0x580dd6){return _0xb8a476+_0x580dd6;},'vOmWg':_0x56ae('0x35','YXCs'),'LaaBO':function _0x1b637c(_0x5c57e1,_0x41b90a){return _0x5c57e1==_0x41b90a;},'eneJI':'post'};var _0xb14971=_0x532424[_0x56ae('0x36','jo5I')](_0x344cd4);var _0x10ace8=_0x532424[_0x56ae('0x37','a6w(')](_0x412a72,_0xb14971[_0x56ae('0x38','*8t[')]());var _0x35ace3=_0x532424[_0x56ae('0x39',')9A&')](dynamicurl,_0x532424[_0x56ae('0x3a','N&Yh')])+_0x10ace8;if(_0x532424['LaaBO'](wzwsmethod,_0x532424[_0x56ae('0x3b','Q@8l')])){_0x2ff265(_0x35ace3,wzwsparams);}else{window[_0x56ae('0x3c',')9A&')]=_0x35ace3;}}_0x33f22a();;if(!(typeof encode_version!==_0x56ae('0x3d','QE(m')&&encode_version===_0x56ae('0x3e','LFWf'))){window[_0x56ae('0x3f','Q@8l')](_0x56ae('0x40','YtnB'));};encode_version = 'sojson.v5';
12 | </script>
13 | 
14 | </body>
15 | </html>
16 | 


--------------------------------------------------------------------------------
/demo/__init__.py:
--------------------------------------------------------------------------------
1 | # !/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | 


--------------------------------------------------------------------------------
/demo/get_cookie.py:
--------------------------------------------------------------------------------
 1 | # !/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import os
 5 | import json
 6 | 
 7 | from dataclasses import dataclass
 8 | from urllib import request
 9 | from urllib.parse import urljoin
10 | from typing import List, Dict
11 | 
12 | envget = os.environ.get
13 | 
14 | 
15 | @dataclass
16 | class WSCookie:
17 |     splash_url = envget("SPLASH_URL", "http://192.168.3.83:8050")
18 | 
19 |     def send_request(self, retry_num=0) -> List[Dict]:
20 |         if retry_num > 3:
21 |             print("尝试重新获取数据3次，还是未获取到cookie，请考虑增加代理")
22 |             return []
23 |         post_body = {
24 |             "har": "1",
25 |             "html5_media": "false",
26 |             "http_method": "GET",
27 |             "png": 1,
28 |             "render_all": False,
29 |             "request_body": False,
30 |             "resource_timeout": 0,
31 |             "response_body": False,
32 |             "viewport": "1920x1080",
33 |             "wait": 3,
34 |             "images": 1,
35 |             "html": 1,
36 |             "expand": 1,
37 |             "timeout": 90,
38 |             "url": "http://wenshu.court.gov.cn/",
39 |             "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36",
40 |             "lua_source": """function main(splash, args)
41 |                                   assert(splash:go(args.url))
42 |                                   assert(splash:wait(0.5))
43 |                                     splash.images_enabled = false
44 |                                   return {
45 |                                     cookie = splash:get_cookies()
46 |                                   }
47 |                                 end
48 |                                """
49 |         }
50 |         req = request.Request(url=urljoin(self.splash_url, "/execute"), data=json.dumps(post_body).encode('utf-8'),
51 |                               headers={"content-type": "application/json"})
52 |         resp = request.urlopen(req)
53 | 
54 |         if resp.status != 200:
55 |             return self.send_request(retry_num+1)
56 | 
57 |         resp_json = json.loads(resp.read())
58 | 
59 |         if 'cookie' not in resp_json:
60 |             return self.send_request(retry_num+1)
61 |         return resp_json['cookie']
62 | 
63 |     @staticmethod
64 |     def parse_cookie(cookies: List[Dict]):
65 |         return {cookie['name']: cookie['value'] for cookie in cookies if cookie['name'] not in ['wzws_cid', 'SESSION']}
66 | 
67 |     def get_cookie(self) -> Dict[str, str]:
68 |         return self.parse_cookie(self.send_request())
69 | 
70 | 
71 | if __name__ == '__main__':
72 |     ws_cookie = WSCookie()
73 |     _cookie = ws_cookie.get_cookie()
74 |     print(_cookie)


--------------------------------------------------------------------------------
/demo2/__init__.py:
--------------------------------------------------------------------------------
1 | # !/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | 


--------------------------------------------------------------------------------
/demo2/get_cookie.py:
--------------------------------------------------------------------------------
 1 | # !/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import sys
 5 | 
 6 | from typing import Dict
 7 | 
 8 | from PyQt5.QtCore import QEventLoop, QUrl
 9 | from PyQt5.QtWidgets import QApplication
10 | from PyQt5.QtWebEngineWidgets import QWebEngineView, QWebEngineProfile
11 | 
12 | 
13 | def callback(html):
14 |     print(html)
15 | 
16 | 
17 | def get_cookie(url: str) -> Dict[str, str]:
18 | 
19 |     class Render(QWebEngineView):
20 |         cookies = {}
21 |         html = None
22 | 
23 |         def __init__(self, url):
24 |             self.app = QApplication(sys.argv)
25 |             super(Render, self). __init__()
26 |             self.page().profile().setHttpUserAgent(
27 |                 "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36"
28 |             )
29 |             self.resize(1920, 1080)
30 |             self.loadFinished.connect(self._loadFinished)
31 |             self.load(QUrl(url))
32 | 
33 |             QWebEngineProfile.defaultProfile().cookieStore().cookieAdded.connect(self._onCookieAdd)
34 | 
35 |             while self.html is None:
36 |                 self.app.processEvents(QEventLoop.ExcludeUserInputEvents | QEventLoop.ExcludeSocketNotifiers | QEventLoop.WaitForMoreEvents)
37 | 
38 |         def _onCookieAdd(self, cookie):
39 |             print(cookie.domain())
40 |             if cookie.domain() != 'wenshu.court.gov.cn':
41 |                 return
42 |             name = cookie.name().data().decode('utf-8')
43 |             value = cookie.value().data().decode('utf-8')
44 |             self.cookies[name] = value
45 | 
46 |         def _callable(self, data):
47 |             self.html = data
48 | 
49 |         def _loadFinished(self):
50 |             self.page().toHtml(self._callable)
51 | 
52 |         def __del__(self):
53 |             self.app.quit()
54 | 
55 |     return Render(url).cookies
56 | 
57 | 
58 | if __name__ == '__main__':
59 |     urll = "http://wenshu.court.gov.cn/"
60 |     print(get_cookie(urll))


--------------------------------------------------------------------------------
/demo2/requirements.txt:
--------------------------------------------------------------------------------
1 | PyQt5
2 | PyQtWebEngine


--------------------------------------------------------------------------------