├── .gitignore ├── LICENSE ├── README.rst ├── _config.yml ├── botflow ├── __init__.py ├── base.py ├── bdata.py ├── bot.py ├── botbase.py ├── botflow.py ├── botframe.py ├── config.py ├── ex │ ├── __init__.py │ ├── aiofile.py │ └── http.py ├── function.py ├── functionbase.py ├── pipe.py ├── queue.py ├── route.py └── routebase.py ├── docs ├── Makefile ├── _config.yml ├── bitcoin_arbitrage.png ├── bot │ ├── index.rst │ ├── replay.rst │ └── run.rst ├── change │ ├── 0.1.8.rst │ ├── 0.1.9.rst │ ├── 0.2.0.rst │ └── index.rst ├── conf.py ├── examples │ ├── index.rst │ ├── lagou1.jpg │ ├── lagou2.jpg │ ├── lagou3.jpg │ ├── lagou5.jpg │ └── lagou_spider.rst ├── faq.rst ├── index.rst ├── make.bat ├── node.rst ├── pipe.rst ├── route.rst └── route │ ├── databot_branch.jpg │ ├── databot_fork.jpg │ ├── databot_join.jpg │ └── databot_return.jpg ├── examples ├── Zip.py ├── aiohttpserver_hello.py ├── aiohttpserver_search.py ├── aiohttpserver_websocket.py ├── amazon_book.ipynb ├── amazon_book.py ├── async.py ├── backpressure.py ├── baidu_spider.py ├── baidu_spider_progress.py ├── basic.py ├── bitcoin_ticker.py ├── bitcoin_ticker_stream.py ├── boosttype.py ├── chainable.py ├── crawler.py ├── ex_output │ └── README.rst ├── lagou_crawler.py ├── loop3.py ├── perf_test │ ├── aiohttpserver.py │ └── httpclinet.py ├── replayex.py ├── requirements.txt ├── simple_bitcoin_price.py └── zip_join.py ├── requirements.txt ├── setup.py └── tests ├── __init__.py ├── objet_ini.py ├── setup.cfg.py ├── test_backpressure.py.bak ├── test_node.py ├── test_python_lang.py ├── test_queue.py ├── test_route.py ├── test_stop.py.bak └── type_hint.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | #pickle file 107 | .pk 108 | 109 | .idea/* 110 | 111 | ex_output/* 112 | !ex_output/README.rst -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015-2016 Ask Solem & contributors. All rights reserved. 2 | Copyright (c) 2012-2014 GoPivotal, Inc. All rights reserved. 3 | Copyright (c) 2009, 2010, 2011, 2012 Ask Solem, and individual contributors. All rights reserved. 4 | 5 | Celery is licensed under The BSD License (3 Clause, also known as 6 | the new BSD license). The license is an OSI approved Open Source 7 | license and is GPL-compatible(1). 8 | 9 | The license text can also be found here: 10 | http://www.opensource.org/licenses/BSD-3-Clause 11 | 12 | License 13 | ======= 14 | 15 | Redistribution and use in source and binary forms, with or without 16 | modification, are permitted provided that the following conditions are met: 17 | * Redistributions of source code must retain the above copyright 18 | notice, this list of conditions and the following disclaimer. 19 | * Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in the 21 | documentation and/or other materials provided with the distribution. 22 | * Neither the name of Ask Solem, nor the 23 | names of its contributors may be used to endorse or promote products 24 | derived from this software without specific prior written permission. 25 | 26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 27 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 28 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Ask Solem OR CONTRIBUTORS 30 | BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 | POSSIBILITY OF SUCH DAMAGE. 37 | 38 | Documentation License 39 | ===================== 40 | 41 | The documentation portion of Celery (the rendered contents of the 42 | "docs" directory of a software distribution or checkout) is supplied 43 | under the "Creative Commons Attribution-ShareAlike 4.0 44 | International" (CC BY-SA 4.0) License as described by 45 | https://creativecommons.org/licenses/by-sa/4.0/ 46 | 47 | Footnotes 48 | ========= 49 | (1) A GPL-compatible license makes it possible to 50 | combine Celery with other software that is released 51 | under the GPL, it does not mean that we're distributing 52 | Celery under the GPL license. The BSD license, unlike the GPL, 53 | let you distribute a modified version without making your 54 | changes open source. -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | Botflow 3 | ======= 4 | 0.2.0 alpha 5 | 6 | 7 | * Dataflow programming framework 8 | * Paralleled in coroutines and ThreadPool 9 | * Type- and content-based route function 10 | * Interactive programming with Jupyter Notebook 11 | 12 | Requirements 13 | ------------ 14 | `Graphviz `_. 15 | 16 | Installing 17 | ---------- 18 | 19 | Install and update using ``pip``: 20 | 21 | `pip install -U botflow` 22 | 23 | Documentation 24 | ------------ 25 | 26 | http://botflow.readthedocs.io 27 | 28 | 29 | 30 | 31 | What's dataflow programming? 32 | =============================== 33 | 34 | All functions are connected by pipes (queues) and communicate by data. 35 | 36 | When data come in, the function will be called and return the result. 37 | 38 | Think about the pipeline operation in unix: ``ls|grep|sed``. 39 | 40 | Benefits: 41 | 42 | #. Decouple data and functionality 43 | #. Easy to reuse 44 | 45 | Botflow provides pipe and route. It makes dataflow programming and powerful data flow processes easier. 46 | 47 | 48 | Botflow is... 49 | ============= 50 | 51 | - **Simple** 52 | 53 | Botflow is easy to use and maintain, *does not need configuration files*, and knows about ``asyncio`` and how to parallelize computation. 54 | 55 | Here's one of the simple applications you can make: 56 | 57 | _Load the price of Bitcoin every 2 seconds. Advantage price aggregator sample can be found `here `_. 58 | 59 | 60 | .. code-block:: python 61 | 62 | from botflow import * 63 | 64 | 65 | def main(): 66 | Pipe( 67 | 68 | Timer(delay=2), # send timer data to pipe every 2 seconds 69 | "http://api.coindesk.com/v1/bpi/currentprice.json", # send url to pipe when timer trigger 70 | HttpLoader(), # read url and load http response 71 | lambda r: r.json['bpi']['USD']['rate_float'], # read http response and parse as json 72 | print, # print out 73 | 74 | ) 75 | 76 | Bot.render('ex_output/simple_bitcoin_price') 77 | Bot.run() 78 | main() 79 | 80 | **Or write in chain style** 81 | 82 | 83 | .. code-block:: python 84 | 85 | 86 | 87 | from botflow import * 88 | p_cd_bitcoin=Pipe().Timer(delay=2).Loop("http://api.coindesk.com/v1/bpi/currentprice.json")\ 89 | .HttpLoader().Map(lambda r: r.json['bpi']['USD']['rate_float']).Map(print) 90 | 91 | p_cd_bitcoin.run() 92 | 93 | 94 | 95 | 96 | - **Flow Graph** 97 | With render function: 98 | `Bot.render('bitcoin_arbitrage')` 99 | Botflow will render the data flow network into a graphviz image. 100 | below is the flow graph generated by Botflow.Aggreate 6 exchanges bitcoin price for trading. 101 | 102 | 103 | .. image:: docs/bitcoin_arbitrage.png 104 | :width: 400 105 | 106 | 107 | 108 | 109 | - **Fast** 110 | Nodes will be run in parallel, and they will perform well when processing stream data. 111 | :Web Crawle: Botflow is 10x fatter than Scrapy 112 | 113 | 114 | 115 | 116 | - **Replay-able** 117 | 118 | With replay mode enabled: 119 | ``config.replay_mode=True`` 120 | when an exception is raised at step N, you don't need to run from setup 1 to N. 121 | Botflow will replay the data from nearest completed node, usually step N-1. 122 | It will save a lot of time in the development phase. 123 | 124 | Release 125 | 126 | :**0.2.0**: Milestone release.: 127 | 128 | # Jupyter support. Able to run inside Jupyter note book. 129 | 130 | # pipe can be nest in another Pipe. 131 | 132 | 133 | p1=Pipe(get_image) 134 | p2=Pipe(get_price) 135 | p_get_all=Pipe(Zip(p1,p2)).Filter 136 | 137 | # Support Chain style pipe line creating. 138 | 139 | Pipe(range(1,10)).Map(lambda x:x+1).Fiter(lambda x:x>2) 140 | 141 | same as : 142 | 143 | Pipe(range(1,10),lambda x:x+1,Filter(lambda x:x>2)) 144 | 145 | 146 | 147 | :**0.1.9**: Major change see below .: 148 | 149 | # Backpressure rate limit support 150 | 151 | # Httpserver support 152 | 153 | # new Node support. *Zip*, *SendTo* *Flat* for make loop and redirect the flow 154 | 155 | # Type hints support .for function type route 156 | 157 | # reorge the source code for readable. 158 | 159 | 160 | :**0.1.8**: http://docs.botflow.org/en/latest/change/0.1.8.html .: 161 | 162 | #. Support parallel in ThreadPool for slow function. 163 | 164 | #. Loop Node is deprecated. raw value and Iterable value can be used directly. 165 | 166 | #. improve performance of BlockedJoin 167 | 168 | :**0.1.7**: 169 | 170 | 171 | RoadMap 172 | ======= 173 | - Will add Httpserver support(REST,Websocket). 174 | - Will support server machine learning Model online. 175 | - Finshe the api reference doc. 176 | - Rename project to Botflow.? 177 | 178 | More about Botflow 179 | =============== 180 | 181 | Data- 182 | programming is typically applied to streams of structured data for filtering, transforming, aggregating (such as computing statistics), or calling other programs. 183 | 184 | Botflow has a few basic concepts to implement Dataflow programming . 185 | 186 | - **Source** 187 | It is feed stream data to the pipe. 188 | 189 | * **Timer**: It will send a message in the pipe by timer param. **delay**, **max_time** **until** some finished 190 | * **Pipe.run**: you can use Pipe.run to trigger the data into pipe. By default it will feed int **0** 191 | 192 | 193 | 194 | - **Function** 195 | It is callable unit.Any callable function and object can work as Node. It is driven by data. Custom functions work as Map unit. 196 | There are some built-in nodes: 197 | 198 | 199 | 200 | * **Fetch**: (Alias:HttpLoader) Get a url and return the HTTP response 201 | * **AioFile**: for file I/O. 202 | * **SpeedLimit**: limit the stream speed limit 203 | * **Delay**: delay in special second. 204 | * **Map** : Work ad Convert unit. 205 | * **Filter** : Drop data from pipe if it does not match some condition 206 | * **Flat** : Drop data from pipe if it does not match some condition 207 | 208 | 209 | - **Route** 210 | It will be used to create a complex data flow network, not just one main process. Botflow can nest Routes inside Routes. 211 | It is a powerful concept. 212 | There are some pre built-in Route: 213 | * **Pipe**: It is the main stream process of the program. All units will work inside. 214 | * **Tee** : (Alias:Branch) Duplicate data from parent pipe to a child pipe as branch. 215 | * **Zip** : Combine multi pipes result to list. 216 | * **Link**: (Alias: LinkTo) Route flow to any Node or Route for making loop , circle 217 | 218 | 219 | All units (Pipe, Node, Route) communicate via queues and perform parallel computation in coroutines. 220 | This is abstracted so that Botflow can be used with only limited knowledge of ``asyncio``. 221 | 222 | 223 | 224 | 225 | Contributing 226 | ------------ 227 | 228 | 229 | Donate 230 | ------ 231 | 232 | 233 | Links 234 | ----- 235 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-minimal -------------------------------------------------------------------------------- /botflow/__init__.py: -------------------------------------------------------------------------------- 1 | from .route import Timer,Branch,Join,Link,Zip 2 | from .pipe import Pipe 3 | from .function import Function,Filter,Delay,SpeedLimit,Flat,ToText 4 | from . import route 5 | from botflow.ex.aiofile import AioFile 6 | from .bdata import Bdata,Databoard 7 | from .botflow import BotFlow 8 | from botflow.ex.http import HttpRequest,HttpLoader,HttpResponse,HttpServer,HttpAck 9 | 10 | from .config import config 11 | Bot=BotFlow 12 | 13 | __all__ = ["Pipe","Timer","Branch","Join","Zip","HttpRequest", 14 | "HttpLoader", "AioFile", "route", 15 | "Bdata","HttpServer","BotFlow","Bot","Delay","SpeedLimit","Link","Function","Flat","ToText"] 16 | 17 | -------------------------------------------------------------------------------- /botflow/base.py: -------------------------------------------------------------------------------- 1 | 2 | import asyncio 3 | from .config import config 4 | import asyncio 5 | import types 6 | # import uvloop 7 | # asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) 8 | 9 | class Singleton(type): 10 | _instances = {} 11 | def __call__(cls, *args, **kwargs): 12 | if cls not in cls._instances: 13 | cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs) 14 | return cls._instances[cls] 15 | 16 | def list_included(iq, oq): 17 | if not isinstance(iq, list): 18 | iq = [iq] 19 | if not isinstance(oq, list): 20 | oq = [oq] 21 | 22 | for q in iq: 23 | for _oq in oq: 24 | if q is _oq: 25 | return True 26 | return False 27 | 28 | 29 | class CountRef(object): 30 | __slots__ = ['count'] 31 | def __init__(self): 32 | self.count=0 33 | 34 | def incr(self,n=1): 35 | self.count=self.count+n 36 | return self.count 37 | 38 | def decr(self): 39 | 40 | self.count=self.count-1 41 | 42 | return self.count 43 | 44 | 45 | async def copy_size( q): 46 | data_list = [] 47 | t = await q.get() 48 | data_list.append(t) 49 | 50 | qsize = q.qsize() 51 | # get branch size without wait. 52 | 53 | count = 0 54 | while qsize > 0: 55 | try: 56 | t = q.get_nowait() 57 | except asyncio.queues.QueueEmpty: 58 | 59 | break 60 | 61 | data_list.append(t) 62 | count += 1 63 | if count >= qsize or count >= config.coroutine_batch_size: 64 | break 65 | return data_list 66 | 67 | 68 | def flatten(d): 69 | for x in d: 70 | if hasattr(x, '__iter__') and isinstance(x, (list,types.GeneratorType)): 71 | for y in flatten(x): 72 | yield y 73 | else: 74 | yield x 75 | 76 | 77 | 78 | class BotExit(Exception): 79 | pass 80 | 81 | 82 | _BOT_LOOP=asyncio.new_event_loop() 83 | 84 | def get_loop(): 85 | # _BOT_LOOP.set_debug(enabled=True) 86 | return _BOT_LOOP 87 | -------------------------------------------------------------------------------- /botflow/bdata.py: -------------------------------------------------------------------------------- 1 | from .base import CountRef,Singleton,get_loop 2 | 3 | import asyncio 4 | import logging 5 | 6 | import uuid 7 | 8 | class BotControl(object): 9 | pass 10 | 11 | 12 | class Retire(BotControl): 13 | pass 14 | 15 | 16 | class Suspend(BotControl): 17 | pass 18 | 19 | 20 | class Resume(BotControl): 21 | pass 22 | 23 | 24 | class ChangeIq(BotControl): 25 | 26 | def __init__(self, iq_num=128): 27 | self.iq_num = iq_num 28 | 29 | 30 | DATA_ADD=1 31 | DATA_REMOVE=2 32 | DATA_COMPLETE=3 33 | 34 | class Databoard(object,metaclass=Singleton): 35 | 36 | 37 | 38 | 39 | def __init__(self): 40 | self._datatrack = {} 41 | self._futures = {} 42 | self.debug=True 43 | #self.buffer=[] 44 | 45 | # self.buffer=[] 46 | def debug_print(self): 47 | for k,v in self._datatrack.items(): 48 | if type(k) == int: 49 | print("Databoard datatrack {},len:{}".format(k)) 50 | else: 51 | print("Databoard datatrack {},len:{}".format(k,len(v))) 52 | for k,v in self._futures.items(): 53 | print("Databoard future {},len:{}".format(k,len(v))) 54 | def check_compeleted(self,ori): 55 | for k, v in self._datatrack[ori].items(): 56 | if v == DATA_ADD: 57 | return False 58 | 59 | result = [] 60 | for k, v in self._datatrack[ori].items(): 61 | if v == DATA_COMPLETE: 62 | result.append(k.data) 63 | 64 | return result 65 | 66 | 67 | 68 | def _check_aweak(self, ori): 69 | 70 | if ori not in self._futures: 71 | return 72 | result=self.check_compeleted(ori) 73 | if result== False: 74 | return 75 | else: 76 | 77 | 78 | if not self._futures[ori].done(): 79 | self._futures[ori].set_result(result) 80 | 81 | return True 82 | 83 | def add(self, bdata): 84 | 85 | 86 | if bdata.ori == 0 or bdata.ori.ori == 0 : #or bdata.is_BotControl(): 87 | return 88 | #self.buffer.append(bdata) 89 | ori = bdata.ori 90 | data = bdata.data 91 | 92 | if ori not in self._datatrack: 93 | self._datatrack[ori] = {} 94 | self._datatrack[ori][bdata] = DATA_ADD 95 | 96 | 97 | 98 | def remove(self, bdata): 99 | 100 | if bdata.ori == 0 or bdata.is_BotControl() or bdata.ori not in self._datatrack: 101 | return 102 | 103 | # logging.DEBUG("remove %s",bdata) 104 | ori = bdata.ori 105 | data = bdata.data 106 | if self._datatrack[ori][bdata] != DATA_COMPLETE: 107 | self._datatrack[ori][bdata] = DATA_REMOVE 108 | self._check_aweak(ori) 109 | 110 | def get_status(self, ori, bdata): 111 | return self._datatrack[ori][bdata] 112 | 113 | def set_ack(self, bdata): 114 | if bdata.ori == 0 : #or bdata.is_BotControl(): 115 | return 116 | 117 | ori = bdata.ori 118 | data = bdata.data 119 | try: 120 | self._datatrack[ori][bdata] = DATA_COMPLETE 121 | except: 122 | raise 123 | self._check_aweak(ori) 124 | 125 | 126 | def get_future(self,ori): 127 | 128 | return self._futures[ori] 129 | 130 | 131 | def create_future(self,ori,callback): 132 | 133 | if ori == 0: 134 | raise Exception("can't wait for 0 input") 135 | return 136 | future = get_loop().create_future() 137 | future.add_done_callback(callback) 138 | self._futures[ori] = future 139 | return future 140 | 141 | async def wait_ori(self, ori): 142 | if ori == 0 or ori in self._futures: 143 | raise Exception("can't wait for 0 input") 144 | return 145 | future = get_loop().create_future() 146 | self._futures[ori] = future 147 | 148 | return await future 149 | # return future.get_result() 150 | 151 | def drop_ori(self, ori): 152 | 153 | if ori in self._datatrack: 154 | del self._datatrack[ori] 155 | del self._futures[ori] 156 | 157 | 158 | 159 | 160 | class Bdata(object): 161 | 162 | 163 | qid=0 164 | @classmethod 165 | def get_uid(cls): 166 | cls.qid+=1 167 | return cls.qid 168 | 169 | 170 | __slots__ = ['_ori', '_data','uid','count'] 171 | 172 | def __init__(self, data, ori=None): 173 | super().__init__() 174 | if isinstance(data, Bdata) or (ori!=0 and not isinstance(ori,Bdata)) : 175 | raise Exception('not right data ' + str(data)) 176 | self.uid=self.get_uid() 177 | self._data = data 178 | #self._meta = None 179 | self._ori = ori 180 | self.count=0 181 | # self._databoard = Databoard() 182 | # 183 | # self.incr() 184 | # self._databoard.add(self) 185 | 186 | def __repr__(self): 187 | if type(self.ori) == int: 188 | return "uid:{},count:{},ori:int({}):data:".format(self.uid, self.count, self.ori, self._data) 189 | else: 190 | return "uid:{},count:{},ori:{}:data:".format(self.uid,self.count,self.ori.uid,self._data) 191 | 192 | def __hash__(self): 193 | 194 | return self.uid 195 | 196 | def __eq__(self, other): 197 | if not hasattr(other,'uid'): 198 | return False 199 | 200 | return self.uid == other.uid 201 | 202 | def __ne__(self,other): 203 | if not hasattr(other, 'uid'): 204 | return True 205 | return not(self.uid == other.uid) 206 | 207 | def incr(self,n=1): 208 | pass 209 | 210 | def decr(self): 211 | pass 212 | def destroy(self): 213 | pass 214 | 215 | # if self.decr() == 0: 216 | # try: 217 | # self._databoard.remove(self) 218 | # except: 219 | # raise 220 | 221 | 222 | @property 223 | def ori(self): 224 | return self._ori 225 | 226 | @property 227 | def data(self): 228 | return self._data 229 | 230 | # def is_BotControl(self): 231 | # 232 | # return isinstance(self._data, BotControl) 233 | 234 | # @classmethod 235 | # def make_Retire(cls): 236 | # return Bdata(Retire(),ZERO_DATA) 237 | 238 | @classmethod 239 | def make_Bdata_zori(cls,data): 240 | return Bdata(data, ZERO_DATA) 241 | 242 | ZERO_DATA=Bdata(0,ori=0) -------------------------------------------------------------------------------- /botflow/bot.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | from .functionbase import Function 4 | from . import function 5 | from .bdata import Bdata 6 | from .config import config 7 | import typing,types 8 | from .botbase import BotBase,BotManager,BotInfo,filter_out 9 | from .base import BotExit,flatten,get_loop 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | class CallableBot(BotBase): 14 | 15 | def __init__(self,input_q,output_q,func): 16 | super().__init__() 17 | self.input_q=input_q 18 | self.output_q=output_q 19 | self.func=func 20 | self.raw_bdata=False 21 | self.type_hint=None 22 | 23 | if isinstance(self.func,types.FunctionType): 24 | d=typing.get_type_hints(self.func) 25 | else: 26 | d=typing.get_type_hints(self.func.__call__) 27 | if len(d) == 1: 28 | for k,v in d.items(): 29 | self.type_hint=v 30 | elif len(d) >1: 31 | raise Exception("{} more one param") 32 | 33 | 34 | 35 | async def pre_hook(self): 36 | 37 | if isinstance(self.func, Function): 38 | await self.func.node_init() 39 | 40 | self.raw_bdata = self.func.raw_bdata 41 | 42 | else: 43 | self.raw_bdata = False 44 | 45 | async def post_hook(self): 46 | if isinstance(self.func, Function): 47 | await self.func.node_close() 48 | 49 | async def sync_to_async(self, f, data): 50 | r=f(data) 51 | if isinstance(r,types.CoroutineType): 52 | r=await r 53 | return r 54 | 55 | async def call_wrap_r(self,func, bdata): 56 | self.consumed_count += 1 57 | logger.debug('call_wrap' + str(type(func)) + str(func)) 58 | 59 | if bdata.data is None: 60 | return None 61 | result = None 62 | 63 | if not isinstance(bdata, Bdata): 64 | raise Exception('bad data {}'.format(bdata)) 65 | 66 | try: 67 | 68 | if isinstance(func, Function) and func.raw_bdata: 69 | param = bdata 70 | else: 71 | param = bdata.data 72 | 73 | if hasattr(func, 'boost_type'): 74 | loop = get_loop() 75 | result = await loop.run_in_executor(None, func, param) 76 | 77 | 78 | else: 79 | r_or_c = func(param) 80 | if isinstance(r_or_c, types.CoroutineType): 81 | r_or_c = await r_or_c 82 | 83 | if filter_out(r_or_c): 84 | result = None 85 | else: 86 | result = r_or_c 87 | except Exception as e: 88 | logger.exception("Exception when call {} with data {}".format(func,bdata.data)) 89 | if config.exception_policy == config.Exception_raise: 90 | raise e 91 | elif config.exception_policy == config.Exception_ignore: 92 | result = None 93 | elif config.exception_policy == config.Exception_pipein: 94 | result = e 95 | elif config.exception_policy == config.Exception_retry: 96 | raise e # TODO 97 | else: 98 | raise Exception('undefined exception policy') 99 | 100 | return result 101 | 102 | async def merge_list(self,func,bdata): 103 | tasks=[] 104 | for d in flatten(bdata.data): #TODO to deal with too large list and generator!!! 105 | if self.type_hint is None or isinstance(d,self.type_hint): 106 | task=asyncio.ensure_future(self.call_wrap_r(func, Bdata(d,bdata.ori))) 107 | tasks.append(task) 108 | 109 | #will keep order 110 | r=await asyncio.gather(*tasks) 111 | 112 | return r 113 | 114 | async def append_q(self,call_wrap_r,func,bdata,q): 115 | r=await call_wrap_r(func,bdata) 116 | logger.debug("exe {} get data {}".format(func,r)) 117 | all_none = False 118 | if isinstance(r,list): 119 | all_none = True 120 | for i in r: 121 | if not i is None: 122 | all_none=False 123 | if all_none == False: 124 | self.produced_count += 1 125 | if len(r) ==1: 126 | await q.put(Bdata(r[0], bdata.ori)) 127 | else: 128 | await q.put(Bdata(r, bdata.ori)) 129 | 130 | elif isinstance(r,typing.Generator): 131 | for i in r: 132 | self.produced_count+=1 133 | await q.put(Bdata(i, bdata.ori)) 134 | 135 | else: 136 | #None only can ignore when Filter Node. 137 | 138 | if r is not None or (bdata.ori.data!=0 and not isinstance(func,function.Filter)): 139 | self.produced_count += 1 140 | await q.put(Bdata(r,bdata.ori)) 141 | 142 | 143 | 144 | def create_coro(self,bdata): 145 | 146 | if isinstance(bdata.data,(list,types.GeneratorType)) \ 147 | and not(self.raw_bdata)\ 148 | and ( self.type_hint is not list): 149 | #and not isinstance(self.func,Node): 150 | 151 | 152 | if self.type_hint is not None: 153 | if isinstance(bdata.data, self.type_hint): #the func request a list 154 | coro = self.append_q(self.merge_list,self.func, bdata, self.output_q) 155 | else: 156 | coro = self.output_q.put(bdata) 157 | 158 | else: 159 | coro = self.append_q(self.merge_list,self.func, bdata, self.output_q) 160 | 161 | return coro 162 | elif config.exception_policy != config.Exception_pipein and isinstance(bdata.data,Exception): 163 | return self.output_q.put(bdata) 164 | 165 | 166 | else: 167 | if self.type_hint is not None : 168 | if isinstance(bdata.data,self.type_hint): 169 | coro = self.append_q(self.call_wrap_r,self.func, bdata, self.output_q) 170 | else: 171 | coro = self.output_q.put(bdata) 172 | 173 | else: 174 | coro = self.append_q(self.call_wrap_r,self.func, bdata, self.output_q) 175 | 176 | return coro 177 | 178 | def make_botinfo(self): 179 | 180 | 181 | bi = BotInfo() 182 | bi.iq = [self.input_q] 183 | bi.oq = [self.output_q] 184 | bi.func = self.func 185 | bi.main_coro = self.main_loop() 186 | 187 | BotManager().add_bot(bi) 188 | self.bi=bi 189 | return bi 190 | 191 | class RouteMixin(object): 192 | pass 193 | 194 | class RouteInBot(BotBase): 195 | def __init__(self,input_q,func): 196 | super().__init__() 197 | self.input_q=input_q 198 | self.func=func 199 | 200 | def create_coro(self,data): 201 | 202 | coro = self.func.route_in(data) 203 | return coro 204 | 205 | def make_botinfo(self): 206 | 207 | bi = BotInfo() 208 | bi.iq = self.func.routein_in_q() 209 | bi.oq = self.func.routein_out_q() 210 | bi.func = self.func 211 | bi.main_coro = self.main_loop() 212 | 213 | BotManager().add_bot(bi) 214 | self.bi=bi 215 | return bi 216 | 217 | 218 | 219 | 220 | class RouteOutBot(BotBase): 221 | def __init__(self, input_q, func): 222 | super().__init__() 223 | self.output_q = input_q 224 | self.func = func 225 | 226 | 227 | def make_botinfo(self): 228 | 229 | bi = BotInfo() 230 | bi.iq = self.func.routeout_in_q() 231 | bi.oq = self.func.routeout_out_q() 232 | bi.func = self.func 233 | bi.main_coro = self.main_loop() 234 | 235 | BotManager().add_bot(bi) 236 | self.bi=bi 237 | return bi 238 | 239 | 240 | async def get_data_list(self): 241 | r=await self.func.route_out() 242 | return [r] 243 | 244 | def create_coro(self, data): 245 | 246 | return self.output_q.put(data) 247 | 248 | 249 | 250 | 251 | 252 | 253 | class TimerBot(BotBase): 254 | 255 | def __init__(self,iq,oq,timer_route): 256 | super().__init__() 257 | self.count=0 258 | self.timer_route=timer_route 259 | self.output_q=oq 260 | self.input_q=None 261 | 262 | config.check_stoping = False 263 | 264 | 265 | 266 | def make_botinfo(self): 267 | 268 | 269 | bi = BotInfo() 270 | 271 | bi.iq = [] 272 | 273 | bi.oq = [self.output_q] 274 | bi.func = self.timer_route 275 | bi.main_coro = self.main_loop() 276 | self.bi = bi 277 | BotManager().add_bot(bi) 278 | 279 | return bi 280 | 281 | def check_stop(self): 282 | 283 | if self.timer_route.max_time and self.timer_route.max_time < self.count: 284 | self.bi.stoped=True 285 | return True 286 | # if self.timer_route.until is not None and self.timer_route.until(): 287 | # self.bi.stoped = True 288 | # return True 289 | return False 290 | 291 | 292 | async def main_logic(self): 293 | 294 | if self.check_stop(): 295 | config.check_stoping=True 296 | raise BotExit() 297 | 298 | 299 | 300 | self.count += 1 301 | 302 | 303 | 304 | await self.output_q.put(Bdata.make_Bdata_zori(self.count)) 305 | 306 | 307 | 308 | await asyncio.sleep(self.timer_route.delay) 309 | 310 | 311 | 312 | 313 | 314 | class LoopBot(BotBase): 315 | def __init__(self, input_q, output_q, it): 316 | 317 | super().__init__() 318 | self.input_q=input_q 319 | self.output_q = output_q 320 | self.it = it 321 | config.check_stoping=False 322 | 323 | def make_botinfo(self): 324 | 325 | 326 | bi = BotInfo() 327 | 328 | bi.iq = [self.input_q] 329 | 330 | bi.oq = [self.output_q] 331 | bi.func = self 332 | bi.main_coro = self.main_loop() 333 | self.bi = bi 334 | BotManager().add_bot(bi) 335 | 336 | return bi 337 | async def get_data_list(self): 338 | r=await self.input_q.get() 339 | return r 340 | 341 | async def main_logic(self): 342 | data_list = await self.get_data_list() 343 | config.check_stoping = False 344 | for v in self.it: 345 | await self.output_q.put(Bdata.make_Bdata_zori(v)) 346 | 347 | config.check_stoping = True 348 | 349 | 350 | 351 | -------------------------------------------------------------------------------- /botflow/botbase.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from .config import config 3 | import logging 4 | from .base import Singleton, list_included,get_loop 5 | 6 | from .base import copy_size 7 | from .functionbase import Function 8 | from .bdata import Bdata 9 | from .queue import SinkQueue 10 | import typing, types 11 | 12 | logger=logging.getLogger(__name__) 13 | from .base import BotExit 14 | import datetime 15 | 16 | 17 | class BotPerf(object): 18 | __slots__ = ['processed_number', 'func_avr_time', 'func_max_time', 'func_min_time'] 19 | 20 | def __init__(self): 21 | self.processed_number = 0 22 | self.func_avr_time = None 23 | self.func_max_time = None 24 | self.func_min_time = None 25 | 26 | 27 | class BotInfo(object): 28 | __slots__ = ['id', 'iq', 'oq', 'func', 'route_zone', 'pipeline', 'stoped', 'perf', 'ep', 'idle', 29 | 'parents', 'flow', "main_coro", "main_task", "sub_task", "sub_coro"] 30 | 31 | def __init__(self): 32 | self.id = 0 33 | self.iq = [] 34 | self.oq = [] 35 | self.main_coro = None 36 | self.main_task = None 37 | self.sub_task = set() 38 | self.sub_coro = set() 39 | self.func = None 40 | self.route_zone = None 41 | self.pipeline = None 42 | self.stoped = False 43 | self.idle = True 44 | self.flow = '' 45 | self.perf = BotPerf() 46 | self.ep = config.Exception_default 47 | self.parents = [] 48 | 49 | def __repr__(self): 50 | return " id {} ,func {} stoped {} idle {} tasks{}".format(str(id(self)), self.func, self.stoped, self.idle, 51 | len(self.sub_task)) 52 | 53 | 54 | class BotManager(object, metaclass=Singleton): 55 | 56 | def __init__(self): 57 | self._bots = [] 58 | self._pipes = set() 59 | self.bot_id = 0 60 | self.loop=get_loop() 61 | 62 | 63 | def rest(self): 64 | self._bots = [] 65 | self._pipes = set() 66 | self.bot_id = 0 67 | 68 | def get_pipes(self): 69 | return self._pipes 70 | 71 | @classmethod 72 | def ready_to_stop(cls, bi): 73 | if bi.iq is not None: 74 | if not isinstance(bi.iq, list): 75 | raise Exception('') 76 | for q in bi.iq: 77 | if not q.empty(): 78 | return False 79 | 80 | for p in bi.parents: 81 | if p.stoped == False: 82 | return False 83 | b = bi 84 | logger.info('ready_to_stop botid %s' % (b)) 85 | return True 86 | 87 | def new_bot_id(self): 88 | self.bot_id += 1 89 | return self.bot_id 90 | def remove_by_pipe(self,pipe): 91 | 92 | self._bots = [ b for b in self._bots if b.pipeline != pipe ] 93 | 94 | 95 | 96 | 97 | def add_pipes(self, pipe): 98 | self._pipes.add(pipe) 99 | 100 | def add_bot(self, bi): 101 | if bi.id == 0: 102 | bi.id = self.new_bot_id() 103 | self._bots.append(bi) 104 | 105 | def get_bots_bypipe(self, pipe): 106 | result = [] 107 | for b in self._bots: 108 | if b.pipeline == pipe: 109 | result.append(b) 110 | 111 | return result 112 | 113 | def make_bot_flowgraph(self, pipe): 114 | for bot in self.get_bots_bypipe(pipe): 115 | 116 | count = 0 117 | for bot_o in self.get_bots_bypipe(pipe): 118 | 119 | for q in bot_o.oq: 120 | if list_included(bot.iq, q): 121 | bot.parents.append(bot_o) 122 | 123 | def bots_size(self): 124 | return len(self._bots) 125 | 126 | def get_bots(self): 127 | return self._bots 128 | 129 | def get_reader_id_by_q(self, q): 130 | ids = [] 131 | for b in self._bots: 132 | if list_included(q, b.iq): 133 | ids.append(b.id) 134 | 135 | return ids 136 | 137 | def get_botinfo_by_id(self, id): 138 | for b in self._bots: 139 | if b.id == id: 140 | return b 141 | return None 142 | 143 | def get_all_q(self): 144 | qs = set() 145 | for b in self._bots: 146 | for q in b.iq + b.oq: 147 | if not isinstance(q, SinkQueue): 148 | qs.add(q) 149 | 150 | return qs 151 | 152 | def get_botinfo_current_task(self) -> BotInfo: 153 | task = asyncio.Task.current_task() 154 | for b in self._bots: 155 | if b.main_task is task or task in b.sub_task: 156 | return b 157 | 158 | # def make_bot_raw(self, iq, oq, f, fu): 159 | # 160 | # bi = BotInfo() 161 | # bi.iq = iq 162 | # if not isinstance(oq, list): 163 | # oq = [oq] 164 | # bi.oq = oq 165 | # bi.futr = fu 166 | # bi.func = f 167 | 168 | # self._bots.append(bi) 169 | 170 | def debug_print(self): 171 | logger.info('-' * 50) 172 | for b in self._bots: 173 | 174 | if not isinstance(b.iq, list): 175 | b.iq = [b.iq] 176 | plist = '' 177 | for p in b.parents: 178 | plist += 'b_' + str(id(p)) + ',' 179 | 180 | oq = '' 181 | for q in b.oq: 182 | oq += 'q_' + str(id(q)) + ',' 183 | 184 | iq = '' 185 | for q in b.iq: 186 | iq += 'q_' + str(id(q)) + ',' 187 | 188 | logger.info('%s,botid %s,pipe:%s,func:%s stoped:%s,parents:%s ,iq:%s, oq:%s' % ( 189 | b.id, b, b.pipeline, b.func, b.stoped, plist, iq, oq)) 190 | 191 | logger.info('-' * 50) 192 | for b in self._bots: 193 | logger.info(b) 194 | # 195 | # 196 | 197 | 198 | class PerfMetric(object): 199 | batch_size = 128 200 | suspend_time = 1 201 | 202 | def __init__(self): 203 | pass 204 | 205 | 206 | async def handle_exception(e, bdata, iq, oq): 207 | if config.exception_policy == config.Exception_raise: 208 | raise e 209 | elif config.exception_policy == config.Exception_ignore: 210 | return 211 | elif config.exception_policy == config.Exception_pipein: 212 | await oq.put(Bdata(e, ori=bdata.ori)) 213 | elif config.exception_policy == config.Exception_retry: 214 | await iq.put(bdata) 215 | else: 216 | raise Exception('undefined exception policy') 217 | 218 | 219 | def filter_out(data): 220 | if data is None or data == []: 221 | return True 222 | return False 223 | 224 | 225 | async def call_wrap(func, bdata, iq, oq, raw_bdata=False): 226 | logger.debug('call_wrap' + str(type(func)) + str(func)) 227 | 228 | if raw_bdata: 229 | param = bdata 230 | 231 | else: 232 | param = bdata.data 233 | 234 | ori = bdata.ori 235 | 236 | if hasattr(func, 'boost_type'): 237 | loop = get_loop() 238 | r_or_c = await loop.run_in_executor(None, func, param) 239 | 240 | else: 241 | try: 242 | r_or_c = func(param) 243 | if filter_out(r_or_c): 244 | return 245 | except Exception as e: 246 | await handle_exception(e, bdata, iq, oq) 247 | 248 | return 249 | 250 | # if isinstance(r_or_c,(str,typing.Tuple,typing.Dict)): 251 | # await oq.put(Bdata(r_or_c,ori=ori)) 252 | 253 | # elif isinstance(r_or_c, types.GeneratorType) or isinstance(r_or_c, list): 254 | if isinstance(r_or_c, types.GeneratorType): 255 | r = r_or_c 256 | for i in r: 257 | await oq.put(Bdata(i, ori=ori)) 258 | elif isinstance(r_or_c, types.CoroutineType): 259 | 260 | try: 261 | r = await r_or_c 262 | if isinstance(r, types.GeneratorType): 263 | 264 | for i in r: 265 | await oq.put(Bdata(i, ori=ori)) 266 | else: 267 | if filter_out(r): 268 | return 269 | 270 | await oq.put(Bdata(r, ori=ori)) 271 | except Exception as e: 272 | 273 | await handle_exception(e, bdata, iq, oq) 274 | 275 | # TODO 276 | 277 | else: 278 | await oq.put(Bdata(r_or_c, ori=ori)) 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | def raw_value_wrap(message): 287 | def _raw_value_wrap(v): 288 | 289 | # elif isinstance(r_or_c, types.GeneratorType) or isinstance(r_or_c, list): 290 | if isinstance(message, typing.Iterable) and (not isinstance(message, (str, dict, tuple))): 291 | 292 | for i in message: 293 | yield i 294 | 295 | else: 296 | 297 | yield message 298 | 299 | return _raw_value_wrap 300 | 301 | 302 | async def wrap_sync_async_call(f, data): 303 | r = f(data) 304 | if isinstance(r, typing.Coroutine): 305 | r = await r 306 | 307 | return r 308 | 309 | 310 | class BotBase(object): 311 | 312 | def __init__(self): 313 | 314 | self.bi = None 315 | self.coro_list = set() 316 | self.tasks = [] 317 | self.input_q = None 318 | self.output_q = None 319 | self.consumed_count=0 320 | self.produced_count=0 321 | self.start_time=datetime.datetime.now() 322 | self.speed_limit=100 323 | self.lock=asyncio.Lock() 324 | 325 | # def pre_hook(self): 326 | # def post_hook(self): 327 | # def stop(self): 328 | 329 | async def pre_hook(self): 330 | pass 331 | 332 | async def post_hook(self): 333 | pass 334 | 335 | def check_stop(self): 336 | return BotManager.ready_to_stop(self.bi) 337 | 338 | async def main_loop(self): 339 | await self.pre_hook() 340 | while True: 341 | 342 | if self.bi.stoped: 343 | break 344 | 345 | try: 346 | 347 | await self.main_logic() 348 | 349 | # if self.produced_count > 100: 350 | # await self.lock.acquire() 351 | # end = datetime.datetime.now() 352 | # s = (end - self.start_time).total_seconds() 353 | # speed_now = self.produced_count / s 354 | # if speed_now > (self.speed_limit * 1.1): 355 | # sleep_time = self.produced_count / self.speed_limit - s 356 | # logger.info(f"{self.func} need to sleep{sleep_time} speed{speed_now} {s} {self.produced_count}") 357 | # #await asyncio.sleep(sleep_time) 358 | # 359 | # 360 | # self.produced_count = 0 361 | # self.start_time = datetime.datetime.now() 362 | # 363 | # self.lock.release() 364 | 365 | except BotExit: 366 | logger.debug("bot_{} exit".format(id(self))) 367 | break 368 | except: 369 | raise 370 | self.bi.idle = True 371 | # if self.check_stop(): 372 | # self.bi.stoped = True 373 | # 374 | # break 375 | # if self.output_q is not None: 376 | # await self.output_q.put(Bdata.make_Retire()) 377 | 378 | self.bi.idle = True 379 | self.bi.stoped = True 380 | 381 | await self.post_hook() 382 | 383 | async def get_data_list(self): 384 | return await copy_size(self.input_q) 385 | 386 | async def main_logic(self): 387 | 388 | data_list = await self.get_data_list() 389 | 390 | self.bi.idle = False 391 | 392 | tasks = [] 393 | for data in data_list: 394 | # if not data.is_BotControl(): 395 | 396 | 397 | coro = self.create_coro(data) 398 | self.bi.sub_coro.add(coro) 399 | task = asyncio.ensure_future(coro) 400 | self.bi.sub_task.add(task) 401 | tasks.append(task) 402 | 403 | if len(tasks) != 0: 404 | await asyncio.gather(*tasks) 405 | 406 | for t in tasks: 407 | self.bi.sub_coro.remove(t._coro) 408 | self.bi.sub_task.remove(t) 409 | -------------------------------------------------------------------------------- /botflow/botflow.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from .config import config 3 | import logging 4 | from .botbase import BotManager 5 | from .queue import SinkQueue, QueueManager,DataQueue,ConditionalQueue 6 | from .base import BotExit,get_loop,_BOT_LOOP 7 | from aiohttp.web import AppRunner,TCPSite 8 | from .pipe import Pipe 9 | from .botframe import BotFrame 10 | from .bdata import Bdata 11 | 12 | logger = logging.getLogger(__name__) 13 | class MyEventLoopPolicy(asyncio.DefaultEventLoopPolicy): 14 | 15 | def get_event_loop(self): 16 | # Do something with loop ... 17 | return _BOT_LOOP 18 | 19 | class BotFlow(object): 20 | 21 | 22 | 23 | started=False 24 | @classmethod 25 | def render(cls, filename): 26 | from graphviz import Digraph 27 | f = Digraph(comment=__file__, format='png') 28 | f.attr('node', shape='circle') 29 | cls.start() 30 | cls.started=True 31 | bots = BotManager().get_bots() 32 | for idx, b in enumerate(bots): 33 | name = str(b.func).replace('>', '').replace('<', '') 34 | name = name.split('.')[-1] 35 | name = name.split('at')[0] 36 | name = "(%d)" % (idx) + name 37 | f.node(str(id(b)), name) 38 | bid = str(id(b)) 39 | for p in b.parents: 40 | pid = str(id(p)) 41 | f.edge(pid, bid) 42 | 43 | f.render(filename, view=True) 44 | 45 | 46 | 47 | @classmethod 48 | async def run_web_app(cls,app,host,port): 49 | runner = AppRunner(app) 50 | await runner.setup() 51 | site = TCPSite(runner, host,port) 52 | await site.start() 53 | 54 | 55 | 56 | 57 | @classmethod 58 | def start(cls): 59 | if cls.started: 60 | return 61 | pipes = BotManager().get_pipes() 62 | for p in pipes: 63 | start_q=DataQueue() 64 | end_q=ConditionalQueue() 65 | p._make(start_q,end_q) 66 | p._start() 67 | 68 | 69 | 70 | 71 | @classmethod 72 | def run_app(cls,app,host='0.0.0.0', port=8080): 73 | 74 | asyncio.set_event_loop_policy(MyEventLoopPolicy()) 75 | print(f"BotFlow start web server http://{host}:{port}") 76 | config.never_stop = True 77 | 78 | cls.start() 79 | 80 | asyncio.ensure_future(cls.run_web_app(app,host,port)) 81 | 82 | 83 | 84 | get_loop().run_forever() 85 | 86 | 87 | 88 | @classmethod 89 | def run(cls,*pipes,silent=False,render=None): 90 | 91 | 92 | bm=BotManager() 93 | loop=get_loop() 94 | if render is not None: 95 | cls.render(render) 96 | if not silent : 97 | QueueManager().dev_mode() 98 | 99 | if config.replay_mode: 100 | try: 101 | Pipe.restore_for_replay() 102 | except: 103 | raise 104 | 105 | try: 106 | 107 | tasks=[] 108 | if len(pipes) == 0: 109 | pipes=bm.get_pipes() 110 | for p in pipes: 111 | start_q=DataQueue() 112 | end_q =SinkQueue() 113 | p._make(start_q,end_q) 114 | p._start() 115 | bdata=Bdata.make_Bdata_zori(0) 116 | task=get_loop().create_task(p._true_run(bdata)) 117 | tasks.append(task) 118 | 119 | f = asyncio.gather(*tasks,loop=loop) 120 | 121 | get_loop().run_until_complete(f) 122 | 123 | 124 | except Exception as e: 125 | if config.replay_mode: 126 | Pipe.save_for_replay() 127 | raise e 128 | 129 | else: 130 | raise e 131 | finally: 132 | BotFlow.reset() 133 | 134 | @classmethod 135 | def reset(cls): 136 | 137 | BotManager().rest() 138 | 139 | @classmethod 140 | def debug_print(cls): 141 | BotManager().debug_print() 142 | QueueManager().debug_print() 143 | # Databoard().debug_print() 144 | 145 | @classmethod 146 | def enable_debug(cls): 147 | config.debug = True 148 | -------------------------------------------------------------------------------- /botflow/botframe.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from . import route 3 | import botflow.functionbase as function 4 | import typing 5 | import types 6 | from .config import config 7 | import logging 8 | from botflow.bdata import Bdata 9 | from .botbase import BotManager, BotInfo,raw_value_wrap 10 | from .bot import CallableBot,RouteOutBot,RouteInBot,TimerBot,LoopBot 11 | from .queue import SinkQueue,QueueManager,DataQueue 12 | logging.basicConfig(format='%(asctime)s %(name)-12s %(levelname)s:%(message)s', level=logging.INFO) 13 | from .base import BotExit,get_loop 14 | 15 | 16 | 17 | 18 | def cmp_q_list(aql,bql): 19 | a=set() 20 | b=set() 21 | 22 | for aq in aql: 23 | if not isinstance(aq, SinkQueue): 24 | a.add(aq) 25 | 26 | for bq in bql: 27 | if not isinstance(bq, SinkQueue): 28 | b.add(bq) 29 | 30 | if len(a) !=len(b): 31 | return False 32 | 33 | if a == b: 34 | return True 35 | 36 | return False 37 | 38 | 39 | from concurrent import futures 40 | executor = futures.ThreadPoolExecutor(max_workers=10) 41 | get_loop().set_default_executor(executor) 42 | 43 | class BotFrame(object): 44 | 45 | 46 | 47 | @classmethod 48 | 49 | 50 | @classmethod 51 | def ready_to_stop(cls, bi): 52 | if bi.iq is not None: 53 | if not isinstance(bi.iq, list): 54 | raise Exception('') 55 | for q in bi.iq: 56 | if not q.empty(): 57 | return False 58 | 59 | for p in bi.parents: 60 | if p.stoped == False: 61 | return False 62 | b = bi 63 | logging.info('ready_to_stop botid %s' % (b)) 64 | return True 65 | 66 | @classmethod 67 | def make_bot_raw(cls, iq, oq, func,coro): 68 | task = asyncio.ensure_future(coro) 69 | bi = BotInfo() 70 | bi.iq = iq 71 | if not isinstance(oq, list): 72 | oq = [oq] 73 | bi.oq = oq 74 | bi.main_coro = coro 75 | bi.main_task= task 76 | bi.func = func 77 | 78 | BotManager().add_bot(bi) 79 | 80 | @classmethod 81 | def make_pipe(cls,pipes): 82 | 83 | for p in pipes: 84 | sq=DataQueue() 85 | eq=SinkQueue() 86 | p.make_route_bot(sq,eq) 87 | 88 | @classmethod 89 | def make_bot(cls, i, o, f): 90 | 91 | 92 | 93 | if not isinstance(f, typing.Callable): 94 | f = raw_value_wrap(f) 95 | # if isinstance(f,(list,types.GeneratorType,range)): 96 | # 97 | # tb = LoopBot(i, o, f) 98 | # bi = tb.make_botinfo() 99 | # return [bi] 100 | # 101 | # 102 | # 103 | # else: 104 | # f = raw_value_wrap(f) 105 | 106 | if isinstance(f, route.Timer): 107 | f.make_route_bot(i,o) 108 | 109 | tb=TimerBot(i,o,f) 110 | bi=tb.make_botinfo() 111 | 112 | 113 | 114 | return [bi] 115 | 116 | 117 | elif isinstance(f, route.Route): 118 | 119 | f.make_route_bot(i, o) 120 | 121 | 122 | bis = [] 123 | 124 | if not cmp_q_list(f.routein_in_q(),f.routein_out_q()): 125 | 126 | rib= RouteInBot(i,f) 127 | bi=rib.make_botinfo() 128 | bis.append(bi) 129 | 130 | 131 | 132 | if f.joined and not cmp_q_list(f.routeout_in_q(),f.routeout_out_q()): 133 | rob=RouteOutBot(o,f) 134 | bi=rob.make_botinfo() 135 | bis.append(bi) 136 | 137 | 138 | 139 | 140 | return bis 141 | 142 | 143 | 144 | else: 145 | cb=CallableBot(i,o,f) 146 | bi=cb.make_botinfo() 147 | return [bi] 148 | 149 | -------------------------------------------------------------------------------- /botflow/config.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | class Config(object): 3 | 4 | Exception_default = 0 5 | Exception_raise=0 6 | Exception_ignore=1 7 | Exception_retry=2 8 | Exception_pipein = 3 9 | stream=0 10 | hierarchical=1 11 | def __init__(self): 12 | self.suppress_exception = False 13 | self.exception_policy=self.Exception_default 14 | self.joined_network=True 15 | self.execute_mode=self.stream 16 | self.replay_mode=False 17 | self.graph_optimize=True 18 | self.coroutine_batch_size=4 #for http loader the batch size don't effect time effort too much 19 | self.debug=False 20 | self.never_stop=False 21 | self.main_lock=asyncio.Lock() 22 | self.main_lock._locked=True 23 | self.check_stoping=True 24 | self.default_queue_max_size=1000 25 | self.backpressure_rate_limit=0 #per sec 26 | 27 | 28 | def __repr__(self): 29 | return str(self.__dict__) 30 | 31 | 32 | 33 | config=Config() 34 | -------------------------------------------------------------------------------- /botflow/ex/__init__.py: -------------------------------------------------------------------------------- 1 | from .http import HttpLoader 2 | from .aiofile import AioFile 3 | 4 | Fetch=HttpLoader 5 | __all__=["AioFile","HttpLoader","Fetch"] -------------------------------------------------------------------------------- /botflow/ex/aiofile.py: -------------------------------------------------------------------------------- 1 | from botflow.function import Function 2 | from functools import partial 3 | import asyncio 4 | from ..botflow import BotManager 5 | class AioFile(Function): 6 | 7 | async def open(self,filename,mode='w'): 8 | 9 | 10 | cb = partial(open, filename, mode=mode, encoding="utf-8") 11 | fd=await self.loop.run_in_executor(None, cb) 12 | return fd 13 | 14 | def __init__(self,fileame,mode='w'): 15 | self.filename=fileame 16 | self.mode=mode 17 | self.fd=None 18 | self.bm=BotManager() 19 | self.loop=self.bm.loop 20 | 21 | super().__init__() 22 | 23 | async def init(self): 24 | self.fd = await self.open(self.filename, self.mode) 25 | async def close(self): 26 | await self.loop.run_in_executor(None, self.fd.close) 27 | 28 | async def __call__(self, text): 29 | if isinstance(text,(list,tuple)): 30 | text=",".join(text) 31 | elif not isinstance(text,(str,bytes)): 32 | text=text.__repr__() 33 | text+="\n" 34 | await self.loop.run_in_executor(None, self.fd.write,text) 35 | return text -------------------------------------------------------------------------------- /botflow/ex/http.py: -------------------------------------------------------------------------------- 1 | 2 | from aiohttp import ClientSession 3 | import aiohttp 4 | import asyncio 5 | from functools import partial 6 | from ..base import get_loop 7 | from botflow.functionbase import Function 8 | from botflow.routebase import Route 9 | import json 10 | from aiohttp import web 11 | from botflow.botbase import BotManager 12 | from botflow.botframe import BotFrame 13 | from botflow import queue 14 | from botflow.bdata import Bdata 15 | from bs4 import BeautifulSoup 16 | from botflow.config import config 17 | import datetime 18 | import logging 19 | logger=logging.getLogger(__name__) 20 | default_headers = { 21 | 22 | 'user-agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36", 23 | 24 | 'accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", 25 | 'accept-language': "zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7", 26 | 'cache-control': "no-cache", 27 | 28 | } 29 | 30 | class HttpRequest(object): 31 | 32 | def __init__(self,url=None,headers=None,payload=None,method='get',request_headers=None): 33 | 34 | if request_headers: 35 | self.parse_headers_string(request_headers) 36 | 37 | else: 38 | self.method = method # GET or POST 39 | if headers: 40 | self.headers=headers 41 | else: 42 | self.headers=default_headers 43 | 44 | self.url=url 45 | 46 | self.query={} 47 | self.payload=payload 48 | 49 | self.cookies=None 50 | 51 | 52 | 53 | def parse_headers_string(self,s): 54 | lines=[] 55 | for line in s.split("\n"): 56 | if len(line.strip()) ==0: 57 | continue 58 | lines.append(line.strip()) 59 | method,url,http_version=lines[0].split(" ") 60 | headers={} 61 | for line in lines[1:]: 62 | 63 | k,v=line.split(":",1) 64 | headers[k]=v 65 | 66 | self.method=method 67 | self.headers=headers 68 | self.url="http://"+headers['Host']+url 69 | 70 | 71 | 72 | 73 | def __setitem__(self, key, value): 74 | setattr(self,key,value) 75 | 76 | 77 | def __getitem__(self, key): 78 | return getattr(self,key) 79 | 80 | 81 | def __repr__(self): 82 | 83 | return "{}(url:{},method:{},payload:{})".format(self.__class__,self.url,self.method,self.payload) 84 | 85 | #session tree 86 | class HttpResponse(object): 87 | 88 | def __init__(self,body,encoding): 89 | self.url='' 90 | self._body=body 91 | self._headers=Function 92 | self._encoding=encoding 93 | self._cookies = Function 94 | self._status=Function 95 | self._json=None 96 | self._soup=None 97 | self._text=None 98 | 99 | @property 100 | def text(self,encoding=None,errors='strict'): 101 | 102 | if self._text is not None: 103 | return self._text 104 | if encoding is None: 105 | encoding = self._encoding 106 | 107 | self._text=self._body.decode(encoding, errors=errors) 108 | return self._text 109 | 110 | @property 111 | def json(self,encoding=None): 112 | if self._json is not None: 113 | return self._json 114 | 115 | 116 | if encoding is None: 117 | encoding = self._encoding 118 | 119 | self._json=json.loads(self._body.decode(encoding)) 120 | return self._json 121 | def search(self,text): 122 | self.soup.find_all() 123 | def get_all_links(self): 124 | for i in self.soup.find_all('a', href=True): 125 | yield i 126 | 127 | @property 128 | def soup(self): 129 | if self._soup is not None: 130 | return self._soup 131 | self._soup = BeautifulSoup(self.text, "lxml") 132 | setattr(self._soup,"get_all_links",self.get_all_links) 133 | return self._soup 134 | 135 | def __repr__(self): 136 | return '%s(%s)'%(self.__class__,self.text) 137 | 138 | class HttpLoader(Function): 139 | 140 | def __init__(self,delay=0,proxy=None,header=None,session_policy=None,timeout=20): 141 | self.delay=delay 142 | self.timeout=timeout 143 | # self.processed_count=0 144 | # self.start_time=datetime.datetime.now() 145 | # self.lock=asyncio.Lock() 146 | super().__init__() 147 | 148 | async def init(self): 149 | 150 | timeout = aiohttp.ClientTimeout(total=self.timeout) 151 | self.session = ClientSession(headers=default_headers,timeout=timeout, connector=aiohttp.TCPConnector(verify_ssl=False)) 152 | 153 | 154 | async def close(self): 155 | 156 | await self.session.close() 157 | 158 | 159 | 160 | async def __call__(self, v): 161 | # self.processed_count += 1 162 | # if self.processed_count%1000==0: 163 | # await self.lock.acquire() 164 | # end = datetime.datetime.now() 165 | # s=(end-self.start_time).total_seconds() 166 | # logger.info(f"speed {self.processed_count/s} ") 167 | # self.processed_count = 0 168 | # self.start_time=datetime.datetime.now() 169 | # self.lock.release() 170 | 171 | if isinstance(v, str): 172 | req=HttpRequest() 173 | req.url=v 174 | req.method='GET' 175 | 176 | elif isinstance(v,dict): 177 | req = HttpRequest() 178 | for k in dir(req): 179 | if k in v: 180 | req[k]=v[k] 181 | 182 | else: 183 | 184 | req=v 185 | 186 | 187 | 188 | 189 | if self.delay!=0: 190 | await asyncio.sleep(self.delay) 191 | 192 | 193 | 194 | to_call=getattr(self.session,req.method.lower()) 195 | response = await to_call(req.url,headers=req.headers,data=req.payload) 196 | 197 | 198 | 199 | 200 | html = await response.read() 201 | 202 | resp= HttpResponse(html,response.get_encoding()) 203 | resp.url=str(response.url) 204 | return resp 205 | 206 | 207 | class HttpSender(object): 208 | pass 209 | #TODO 210 | 211 | 212 | 213 | 214 | 215 | class HttpServer(Route): 216 | 217 | 218 | def __init__(self,route_path="/",port=8080,bind_address='127.0.0.1',timeout=10): 219 | 220 | 221 | super().__init__() 222 | self.route_path=route_path 223 | self.port=port 224 | self.bind_address=bind_address 225 | self.timeout=timeout 226 | self.bm=BotManager() 227 | self.waiters={} 228 | 229 | config.never_stop=True 230 | 231 | async def response_stream(self, request: web.Request): 232 | breq = HttpRequest() 233 | breq.headers = request.headers 234 | breq.cookies = request.cookies 235 | breq.url = request.url 236 | breq.path = request.path 237 | if breq.path != self.route_path: 238 | return web.Response(text="error path") 239 | breq.method = request.method 240 | breq.payload = await request.text() 241 | breq.query = request.query 242 | 243 | 244 | bdata = Bdata(breq,queue.DataQueue()) 245 | 246 | resp = web.StreamResponse(status=200, 247 | reason='OK', 248 | headers={'Content-Type': 'text/html'}) 249 | 250 | await resp.prepare(request) 251 | 252 | await self.output_q.put(bdata) 253 | while True: 254 | try: 255 | r:Bdata= await asyncio.wait_for(bdata.ori.get(),10) 256 | # if r.is_BotControl(): 257 | # break 258 | import json 259 | json=json.dumps(r.data) 260 | await resp.write(bytes(json,encoding='utf-8')) 261 | except Exception as e: 262 | print(e) 263 | 264 | break 265 | 266 | await resp.write_eof(b'\nend\n') 267 | #await resp.write(b"heelo") 268 | #await asyncio.sleep(1) 269 | 270 | return resp 271 | 272 | async def put_input_queue(self,request:web.Request): 273 | breq=HttpRequest() 274 | breq.headers=request.headers 275 | breq.cookies=request.cookies 276 | breq.url=request.url 277 | breq.path=request.path 278 | if breq.path != self.route_path: 279 | return web.Response(text="error path") 280 | breq.method=request.method 281 | breq.payload=await request.text() 282 | breq.query=request.query 283 | 284 | ori=Bdata(breq,0) 285 | bdata=Bdata(breq,ori) 286 | 287 | #send result to q 288 | await self.output_q.put(bdata) 289 | fut=self._loop.create_future() 290 | self.waiters[bdata]=fut 291 | r=await asyncio.wait_for(fut,self.timeout) 292 | 293 | 294 | try: 295 | 296 | return web.json_response(r) 297 | except: 298 | raise 299 | 300 | 301 | 302 | def make_route_bot(self,iq,oq): 303 | self.share=False 304 | self.joined=True 305 | self.outer_iq=iq 306 | self.outer_oq=oq 307 | self._loop=get_loop() 308 | 309 | self.start_q=[queue.DataQueue()] 310 | self.output_q=oq 311 | server = web.Server(self.put_input_queue) 312 | 313 | fs=self._loop.create_server(server, self.bind_address, self.port) 314 | BotFrame.make_bot_raw(self.start_q,self.output_q,HttpServer,fs) 315 | 316 | 317 | async def route_in(self,bdata): 318 | if bdata.ori in self.waiters: 319 | waiter=self.waiters[bdata.ori] 320 | waiter.set_result(bdata.data) 321 | 322 | 323 | class HttpAck(Route): 324 | 325 | def init_param(self): 326 | self.window_time=None 327 | self.window_size=1 328 | self.window_policy=0.1 329 | self.buffer={} 330 | self.raw_bdata = True 331 | 332 | def get_route_output_q_desc(self): 333 | 334 | return super().get_route_output_q_desc()+[] 335 | 336 | def make_route_bot(self,iq,oq): 337 | self.init_param() 338 | self.outer_iq=iq 339 | self.outer_oq=oq 340 | 341 | self.output_q=oq 342 | self.start_q=[oq] 343 | 344 | async def route_in(self,bdata): 345 | 346 | import copy 347 | self.databoard.set_ack(bdata) 348 | cp_bdata= Bdata(bdata.data,ori=0) 349 | await self.output_q.put(cp_bdata) 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | -------------------------------------------------------------------------------- /botflow/function.py: -------------------------------------------------------------------------------- 1 | from .functionbase import Function 2 | from .botbase import raw_value_wrap 3 | from .base import flatten,get_loop 4 | import asyncio 5 | import datetime 6 | 7 | 8 | 9 | import typing 10 | 11 | __all__=["Filter","Delay","SpeedLimit","Map","ToText","Loop","Flat"] 12 | class Flat(Function): 13 | 14 | def __init__(self,level=0): 15 | super().__init__() 16 | self.level=0 17 | self.raw_bdata=True 18 | 19 | def __call__(self, bdata): 20 | 21 | if isinstance(bdata.data, (list,typing.Generator)): 22 | 23 | for i in bdata.data: 24 | yield i 25 | 26 | else: 27 | 28 | yield bdata.data 29 | 30 | 31 | 32 | 33 | 34 | # class Zip(Function): 35 | # def __init__(self,n_stream=0): 36 | # if n_stream == 0: 37 | # raise Exception('for Zip node ,need to set join_ref or n_stream') 38 | # 39 | # super().__init__() 40 | # #self.join_ref=join_ref 41 | # self.n_stream=n_stream 42 | # self.buffer={} 43 | # self.raw_bdata=True 44 | # 45 | # 46 | # # def init(self): 47 | # #self.n_stream=self.join_ref.n_stream 48 | # # return 49 | # 50 | # def __call__(self, bdata): 51 | # if bdata.ori not in self.buffer: 52 | # self.buffer[bdata.ori] = [] 53 | # 54 | # self.buffer[bdata.ori].append(bdata.data) 55 | # 56 | # if len(self.buffer[bdata.ori]) == self.n_stream: 57 | # return self.buffer[bdata.ori] 58 | 59 | 60 | Loop=raw_value_wrap 61 | 62 | class Filter(Function): 63 | 64 | 65 | def __init__(self, filter_func): 66 | super().__init__() 67 | # if not isinstance(filter_types,list) and filter_types is not None: 68 | # filter_types=[filter_types] 69 | 70 | # self.filter_types=filter_types 71 | self.filter_func=filter_func 72 | self.raw_bdata=True 73 | def __call__(self, bdata): 74 | 75 | data=bdata.data 76 | if isinstance(self.filter_func,str): 77 | if self.filter_func in str(data): 78 | return data 79 | else: 80 | return None 81 | 82 | 83 | if self.filter_func(data): 84 | return data 85 | 86 | # matched=False 87 | # if self.filter_types: 88 | # for t in self.filter_types: 89 | # if isinstance(data,t): 90 | # matched=True 91 | # break 92 | # else: 93 | # matched=True 94 | # 95 | # if matched and (self.filter_func == None or self.filter_func(data)): 96 | # return data 97 | 98 | 99 | class Delay(Function): 100 | def __init__(self,delay_time=1): 101 | super().__init__() 102 | self.delay_time=delay_time 103 | self.lock=asyncio.Lock(loop=get_loop()) 104 | 105 | async def __call__(self,data): 106 | await self.lock.acquire() 107 | await asyncio.sleep(self.delay_time) 108 | self.lock.release() 109 | return data 110 | 111 | class SpeedLimit(Function): 112 | def __init__(self,speed): 113 | super().__init__() 114 | self.processed_count=0 115 | self.start_time=datetime.datetime.now() 116 | self.speed_limit=speed 117 | self.lock=asyncio.Lock() 118 | 119 | async def __call__(self,data): 120 | self.processed_count += 1 121 | if self.processed_count > self.speed_limit: 122 | await self.lock.acquire() 123 | end = datetime.datetime.now() 124 | s=(end-self.start_time).total_seconds() 125 | speed_now=self.processed_count/s 126 | if speed_now>(self.speed_limit*1.1) : 127 | sleep_time=self.processed_count/self.speed_limit-s 128 | self.start_time = datetime.datetime.now() 129 | await asyncio.sleep(sleep_time) 130 | else: 131 | self.start_time = datetime.datetime.now() 132 | self.processed_count=0 133 | 134 | self.lock.release() 135 | 136 | return data 137 | 138 | 139 | 140 | class Map(Function): 141 | def __init__(self,function): 142 | super().__init__() 143 | self.func=function 144 | 145 | 146 | def __call__(self,data): 147 | 148 | return self.func(data) 149 | 150 | 151 | class ToText(Function): 152 | def __init__(self,func=None): 153 | super().__init__() 154 | self.func=func 155 | 156 | def __call__(self, data): 157 | if self.func : 158 | return self.func(data) 159 | else: 160 | return str(data) -------------------------------------------------------------------------------- /botflow/functionbase.py: -------------------------------------------------------------------------------- 1 | from .base import CountRef 2 | 3 | class dotdict(dict): 4 | """dot.notation access to dictionary attributes""" 5 | __getattr__ = dict.get 6 | __setattr__ = dict.__setitem__ 7 | __delattr__ = dict.__delitem__ 8 | 9 | 10 | 11 | class Function(CountRef): 12 | boost_by_thread=1 13 | boost_by_process=2 14 | #boost_by_cluster=3 15 | def __init__(self, *args, **kwargs): 16 | self.args = args 17 | self.kwargs = dotdict(kwargs) 18 | 19 | self.inited = False 20 | self.closed = False 21 | self.raw_bdata=False 22 | self.init_param() 23 | super().__init__() 24 | 25 | def init_param(self): 26 | pass 27 | 28 | async def init(self): 29 | return 30 | 31 | async def close(self): 32 | return 33 | 34 | 35 | async def node_init(self): 36 | if self.incr() == 1: 37 | await self.init() 38 | 39 | 40 | async def node_close(self): 41 | if self.decr() == 0: 42 | await self.close() 43 | 44 | @classmethod 45 | def boost(cls,f): 46 | 47 | 48 | f.boost_type=cls.boost_by_thread 49 | 50 | return f 51 | 52 | # 53 | # @classmethod 54 | # def boost_type(cls,type=boost_by_thread): 55 | # 56 | # def wrap(f): 57 | # f.boost_type=type 58 | # 59 | # return f 60 | # 61 | # 62 | # return wrap 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | def node_debug(input): 71 | return input 72 | -------------------------------------------------------------------------------- /botflow/pipe.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | from .botframe import BotFrame 4 | from .config import config 5 | 6 | from .queue import DataQueue,SinkQueue,CachedQueue,ProxyQueue,ConditionalQueue,QueueManager 7 | from botflow.bdata import Bdata,Databoard 8 | from .botbase import BotManager 9 | from .routebase import Route 10 | from .base import BotExit 11 | from . import ex 12 | from . import function,route 13 | import sys 14 | import types,typing 15 | import logging 16 | import inspect 17 | import time 18 | logger=logging.getLogger(__name__) 19 | 20 | 21 | class Runnable(object): 22 | def stop(self, force=False): 23 | bm = BotManager() 24 | 25 | for bot in bm.get_bots_bypipe(self): 26 | bot.stoped = True 27 | 28 | for q in self.all_q: 29 | for get in q._getters: 30 | get.set_exception(BotExit("Bot exit now")) 31 | 32 | 33 | async def check_stop(self): 34 | 35 | all_q = self.all_q 36 | 37 | while True: 38 | 39 | # await config.main_lock.acquire() 40 | stop = True 41 | if logger.level == logging.DEBUG: 42 | QueueManager().debug_print() 43 | # QueueManager().debug_print() 44 | for bot in self.bm.get_bots_bypipe(self): 45 | if len(bot.sub_task) != 0: 46 | logger.debug("bot id :{} sub task len:{} sopt to close".format(id(bot), len(bot.sub_task))) 47 | stop = False 48 | break 49 | 50 | for q in all_q: 51 | if isinstance(q, (SinkQueue,ConditionalQueue)) or self.output_q == q: 52 | continue 53 | if q.empty() == False: 54 | # print("id:{} size:{}".format(id(q),q.qsize())) 55 | logger.debug("id:{} size:{} stop to close".format(id(q), q.qsize())) 56 | stop = False 57 | break 58 | 59 | if stop and config.check_stoping: 60 | break 61 | 62 | await asyncio.sleep(2) 63 | 64 | logging.info("pipe_{} ready to exit".format(id(self))) 65 | self.stop() 66 | 67 | 68 | 69 | def get_start_q(self): 70 | return self.start_q[0] 71 | 72 | async def run_async(self, data): 73 | 74 | ori = Bdata.make_Bdata_zori(data) 75 | await self.get_start_q().put(Bdata(data, ori)) 76 | r = await self.output_q.get_by(ori) 77 | self.output_q.clean(ori) 78 | if isinstance(r.data, list): 79 | for i, v in enumerate(r.data): 80 | if isinstance(v, Exception): 81 | r.data[i] = None 82 | return r.data 83 | 84 | 85 | def _start(self): 86 | 87 | 88 | bots = BotManager().get_bots_bypipe(self) 89 | tasks = [] 90 | for b in bots: 91 | # if not b.stoped: 92 | if b.main_coro is not None: 93 | task = self.bm.loop.create_task(b.main_coro) 94 | b.main_task = task 95 | tasks.append(task) 96 | 97 | return tasks 98 | def _make(self,start_q,end_q): 99 | 100 | self.make_route_bot(start_q, end_q) 101 | 102 | async def _true_run(self, bdata): 103 | 104 | 105 | await self.get_start_q().put(bdata) 106 | await self.check_stop() 107 | 108 | def run(self, data=0): 109 | 110 | start_q = DataQueue() 111 | end_q = DataQueue(maxsize=0) 112 | if isinstance(data,(list,range)): 113 | 114 | self.args.insert(0,data) 115 | bdata = Bdata.make_Bdata_zori(0) 116 | else: 117 | bdata = Bdata.make_Bdata_zori(data) 118 | self._make(start_q,end_q) 119 | self._start() 120 | self.bm.loop.run_until_complete(self._true_run(bdata)) 121 | result = self.get_result() 122 | self.bm.remove_by_pipe(self) 123 | 124 | return result 125 | 126 | class Pipe(Runnable,Route): 127 | 128 | # | 129 | # | 130 | # | 131 | # | 132 | 133 | def __init__(self, *args): 134 | self.bm=BotManager() 135 | q_o = DataQueue() 136 | self.route_type = [object] 137 | self.route_func=None 138 | # get this pip own inside bot 139 | 140 | self.bot_start_index=0 141 | self.bot_end_index=0 142 | 143 | self.joined = False 144 | self.args=[] 145 | self.args.extend(args) 146 | self.bots=[] 147 | for fn in function.__all__+route.__all__+ex.__all__: 148 | 149 | setattr(self,fn,self.function_wrap(fn)) 150 | 151 | BotManager().add_pipes(self) 152 | 153 | def function_wrap(self,func_name): 154 | 155 | if func_name in function.__all__: 156 | to_call = getattr(function, func_name) 157 | elif func_name in ex.__all__: 158 | 159 | to_call = getattr(ex, func_name) 160 | 161 | else: 162 | to_call = getattr(route, func_name) 163 | 164 | def _wrap(*args,**kwargs): 165 | 166 | 167 | self.args.append(to_call(*args,**kwargs)) 168 | return self 169 | 170 | 171 | return _wrap 172 | 173 | 174 | def part_wrape(self,f_list): 175 | 176 | def _wrape(data): 177 | r=data 178 | for f in f_list: 179 | 180 | if isinstance(r,(list,typing.Generator)): 181 | _r = [] 182 | for i in r: 183 | __r=f(i) 184 | #if __r is not None: 185 | _r.append(__r) 186 | if len(_r) == 1: 187 | r=_r[0] 188 | else: 189 | r=_r 190 | else: 191 | r=f(r) 192 | 193 | if isinstance(r, ( typing.Generator)): 194 | result=[] 195 | for i in r: 196 | result.append(i) 197 | return result 198 | return r 199 | 200 | return _wrape 201 | def merge_args(self): 202 | _list=[] 203 | part = [] 204 | for f in self.args: 205 | 206 | if not isinstance(f, (typing.Callable,Route)): 207 | f = function.Loop(f) 208 | 209 | if isinstance(f,Route) or\ 210 | isinstance(f , function.Flat) or \ 211 | inspect.iscoroutinefunction(f.__call__): 212 | if len(part) !=0: 213 | _list.append(self.part_wrape(part)) 214 | part = [] 215 | 216 | _list.append(f) 217 | else: 218 | part.append(f) 219 | if len(part)==1: 220 | _list.append(f) 221 | elif len(part)>1: 222 | _list.append(self.part_wrape(part)) 223 | 224 | return _list 225 | 226 | def make_route_bot(self,iq,oq): 227 | self.share=False 228 | self.outer_iq = iq 229 | self.outer_oq = oq 230 | 231 | 232 | 233 | 234 | self.bot_start_index = len(self.bm.get_bots()) 235 | self.start_q = [iq] 236 | q_o=self.start_q[0] 237 | #self.args=self.merge_args() 238 | for idx, func in enumerate(self.args): 239 | q_i = q_o 240 | if idx == len(self.args) - 1: 241 | q_o = oq 242 | 243 | else: 244 | if config.replay_mode: 245 | q_o = CachedQueue() 246 | else: 247 | q_o = DataQueue() 248 | 249 | bis = BotFrame.make_bot(q_i, q_o, func) 250 | for b in bis: 251 | b.flow = 'main' 252 | 253 | 254 | self.bot_end_index = len(self.bm.get_bots()) 255 | self.output_q = q_o 256 | bots = self.bm.get_bots() 257 | 258 | self.all_q = set() 259 | for i in range(self.bot_start_index, self.bot_end_index): 260 | bot = bots[i] 261 | self.bots.append(bot) 262 | bot.pipeline = self 263 | for q in bot.iq: 264 | self.all_q.add(q) 265 | for q in bot.oq: 266 | self.all_q.add(q) 267 | 268 | 269 | self.bm.make_bot_flowgraph(self) 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | async def aiohttp_json_handle(self,request): 278 | 279 | from aiohttp import web 280 | r = await self.run_async(request) 281 | return web.json_response(r) 282 | 283 | 284 | def sanic_json_handle(self): 285 | from sanic.response import json 286 | async def _wrap(request): 287 | r = await self(request) 288 | return json(r) 289 | 290 | return _wrap 291 | 292 | 293 | 294 | @classmethod 295 | def empty(cls): 296 | bm=BotManager() 297 | bi=bm.get_botinfo_current_task() 298 | for q in bi.pipeline.all_q: 299 | if isinstance(q, SinkQueue): 300 | continue 301 | if q.empty() == False: 302 | print("id:{}".format(id(q))) 303 | return False 304 | 305 | 306 | for bot in bm.get_bots_bypipe(bi.pipeline): 307 | 308 | if bi == bot: 309 | continue 310 | if len(bot.sub_task) !=0: 311 | print(bot.func,bot,len(bot.sub_task)) 312 | return False 313 | 314 | 315 | 316 | 317 | return True 318 | 319 | def save_for_replay(self): 320 | '''it will save cached data for pay back''' 321 | 322 | self.pickle_name = sys.modules['__main__'].__file__ + 'palyback.pk' 323 | #1. get output queue of the nearest closed node in main pipe 324 | #2.save the data 325 | max_id=-1 326 | bot=None 327 | for b in BotFrame.bots: 328 | if b.flow=='main' and b.stoped==True: 329 | if b.id > max_id: 330 | bot=b 331 | max_id=b.id 332 | if bot is None: 333 | pass 334 | 335 | obj={} 336 | obj['botid']=max_id 337 | 338 | to_dump=[] 339 | for q in bot.oq: 340 | #iid=get_writor_botid(q) 341 | iid=[max_id] 342 | oid=self.bm.get_reader_id_by_q(q) 343 | to_dump.append((iid,oid,q.cache)) 344 | 345 | obj['data'] =to_dump 346 | 347 | import pickle 348 | with open(self.pickle_name,'wb') as f: 349 | pickle.dump(obj,f) 350 | 351 | 352 | def get_q_by_bot_id_list(self, iid, oid): 353 | q_of_writer=set() 354 | q_of_reader=set() 355 | 356 | for i in iid: 357 | for q in self.bm.get_botinfo_by_id(i).oq: 358 | q_of_writer.add(q) 359 | for i in oid: 360 | for q in self.bm.get_botinfo_by_id(i).iq: 361 | q_of_reader.add(q) 362 | 363 | 364 | r=q_of_writer&q_of_reader 365 | return r.pop() 366 | 367 | 368 | def restore_for_replay(self): 369 | '''''' 370 | 371 | import os.path 372 | if not os.path.isfile(self.pickle_name): 373 | return 374 | 375 | import pickle 376 | with open(self.pickle_name,'rb') as f: 377 | obj=pickle.load(f) 378 | 379 | botid=obj['botid'] 380 | for b in BotFrame.bots: 381 | if b.id<=botid: 382 | b.stoped=True 383 | for data in obj['data']: 384 | (iid,oid,cache)=data 385 | q=self.get_q_by_bot_id_list(iid, oid) 386 | q.load_cache(cache) 387 | 388 | return 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | def finished(self): 401 | bm=BotManager() 402 | for bot in bm.get_bots_bypipe(self): 403 | task = bot.main_task 404 | if not (task.done() or task.cancelled()) and bot.idle == False: 405 | return False 406 | return True 407 | 408 | async def write(self,data): 409 | await self.start_q.put(Bdata.make_Bdata_zori(data)) 410 | 411 | async def read(self): 412 | r =await self.output_q.get() 413 | yield r 414 | 415 | while not self.output_q.empty(): 416 | r=self.output_q.get_nowait() 417 | yield r 418 | 419 | 420 | def get_result(self): 421 | result=[] 422 | while True: 423 | try: 424 | r = self.output_q.get_nowait() 425 | result.append(r.data) 426 | except: 427 | break 428 | #self.bm.loop.stop() 429 | if len(result)==1: 430 | return result[0] 431 | return result 432 | 433 | def dev_mode(self): 434 | QueueManager().dev_mode() 435 | 436 | 437 | 438 | 439 | 440 | def __repr__(self): 441 | return 'Pip_' + str(id(self)) 442 | 443 | 444 | 445 | -------------------------------------------------------------------------------- /botflow/queue.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from .bdata import Bdata 3 | from .base import Singleton,get_loop 4 | from .config import config 5 | import logging 6 | import datetime 7 | import collections 8 | logger = logging.getLogger(__name__) 9 | 10 | class QueueManager(object,metaclass=Singleton): 11 | 12 | def __init__(self): 13 | self.q_list=[] 14 | self._dev_mode=False 15 | 16 | 17 | 18 | def reset(self): 19 | self.q_list=[] 20 | 21 | def add(self,q): 22 | 23 | logger.debug( 24 | "QM add q_{},max size:{}".format(id(q), q.maxsize)) 25 | self.q_list.append(q) 26 | 27 | def debug_print(self): 28 | 29 | 30 | for q in self.q_list: 31 | if isinstance(q,DataQueue): 32 | logger.info("q_{},max size:{},qsize:{},high water:{},data:{}".format(id(q),q.maxsize,q.qsize(),q.high_water,type(q))) 33 | else: 34 | logger.info("q_{},type:{}".format(id(q),type(q))) 35 | 36 | def dev_mode(self): 37 | self._dev_mode=True 38 | 39 | class DataQueue(asyncio.Queue): 40 | def __init__(self,maxsize=None,loop=None): 41 | 42 | 43 | if maxsize is None: 44 | maxsize=config.default_queue_max_size 45 | 46 | super().__init__(maxsize=maxsize,loop=get_loop()) 47 | self.qm=QueueManager() 48 | self.debug = True 49 | self.high_water = 0 50 | self.qm.add(self) 51 | self.put_count=0 52 | self.get_count=0 53 | 54 | self.start_time=datetime.datetime.now() 55 | self.speed_limit=config.backpressure_rate_limit 56 | self.lock=asyncio.Lock() 57 | 58 | self.put_callback=None 59 | 60 | 61 | 62 | 63 | async def writable(self): 64 | pass 65 | #TODO 66 | 67 | def set_put_callback(self,f): 68 | self.put_callback=f 69 | 70 | async def put(self, item): 71 | '''with out any limit ,the max put speed 23200 from list a generator''' 72 | if not isinstance(item,Bdata): 73 | e=Exception('not right data'+str(type(item))) 74 | logger.error(e) 75 | raise e 76 | 77 | if self.speed_limit !=0: 78 | self.put_count += 1 79 | 80 | 81 | if self.put_count >self.speed_limit*2: 82 | await self.lock.acquire() 83 | end = datetime.datetime.now() 84 | s = (end - self.start_time).total_seconds() 85 | speed_now = self.put_count / s 86 | logger.debug(f"q_{id(self)} speed now:{speed_now} {s} {self.put_count}") 87 | if speed_now > (self.speed_limit * 1.1): 88 | sleep_time = self.put_count / self.speed_limit - s 89 | logger.debug(f"q_{id(self)} need to sleep{sleep_time} ") 90 | logger.debug(f"start q_{id(self)} {datetime.datetime.now()}") 91 | self.start_time = datetime.datetime.now() 92 | await asyncio.sleep(sleep_time) 93 | logger.debug(f"end q_{id(self)} {datetime.datetime.now()}") 94 | 95 | else: 96 | self.start_time = datetime.datetime.now() 97 | self.put_count = 0 98 | 99 | 100 | self.lock.release() 101 | 102 | 103 | r= await super().put(item) 104 | 105 | if self.debug: 106 | if self.qsize()>self.high_water: 107 | self.high_water=self.qsize() 108 | 109 | # if self.put_callback is not None: 110 | # asyncio.ensure_future(self.put_callback(item)) 111 | return r 112 | 113 | def __repr__(self): 114 | return "{}({})".format(self.__class__,id(self)) 115 | 116 | 117 | def __str__(self): 118 | return "{}({})".format(self.__class__,id(self)) 119 | 120 | async def get(self): 121 | 122 | r=await super().get() 123 | #r.destroy() 124 | return r 125 | 126 | async def get_by(self,ori): 127 | while True: 128 | await self.readable() 129 | item=self._queue[-1] 130 | if item.ori == ori: 131 | return self._queue.popleft() 132 | 133 | 134 | async def readable(self): 135 | while self.empty(): 136 | getter = self._loop.create_future() 137 | self._getters.append(getter) 138 | try: 139 | await getter 140 | except: 141 | getter.cancel() # Just in case getter is not done yet. 142 | 143 | try: 144 | self._getters.remove(getter) 145 | except ValueError: 146 | pass 147 | 148 | if not self.empty() and not getter.cancelled(): 149 | # We were woken up by put_nowait(), but can't take 150 | # the call. Wake up the next in line. 151 | self._wakeup_next(self._getters) 152 | raise 153 | 154 | return 155 | 156 | 157 | class ConditionalQueue: 158 | 159 | 160 | def __init__(self, maxsize=0, *, loop=None): 161 | 162 | self.qm = QueueManager() 163 | 164 | self._loop =get_loop() 165 | 166 | self._maxsize = maxsize 167 | 168 | # Futures. 169 | self._inernel_getters = {} #collections.deque() 170 | # Futures. 171 | self._putters = collections.deque() 172 | # self._unfinished_tasks = 0 173 | # self._finished = asyncio.Lock.Event(loop=self._loop) 174 | # self._finished.set() 175 | self._init(maxsize) 176 | 177 | self.debug = True 178 | self.high_water = 0 179 | self.qm.add(self) 180 | self.put_count=0 181 | 182 | # These three are overridable in subclasses. 183 | @property 184 | def _getters(self): 185 | r=[] 186 | for k,v in self._inernel_getters.items(): 187 | r.extend(v) 188 | return r 189 | 190 | def _init(self, maxsize): 191 | self._queue = {} #collections.deque() 192 | def _init_dict(self,ori): 193 | if ori not in self._inernel_getters: 194 | self._inernel_getters[ori]=collections.deque() 195 | 196 | if ori not in self._queue: 197 | self._queue[ori]=collections.deque() 198 | 199 | def clean(self,ori): 200 | del self._inernel_getters[ori] 201 | del self._queue[ori] 202 | 203 | 204 | def _get(self,ori): 205 | return self._queue[ori].popleft() 206 | 207 | 208 | 209 | def _put(self, item): 210 | 211 | self._queue[item.ori].append(item) 212 | 213 | # End of the overridable methods. 214 | 215 | def _wakeup_next(self, waiters): 216 | # Wake up the next waiter (if any) that isn't cancelled. 217 | while waiters: 218 | waiter = waiters.popleft() 219 | if not waiter.done(): 220 | waiter.set_result(None) 221 | break 222 | 223 | def __repr__(self): 224 | return '<{} at {:#x} {}>'.format( 225 | type(self).__name__, id(self), self._format()) 226 | 227 | def __str__(self): 228 | return '<{} {}>'.format(type(self).__name__, self._format()) 229 | 230 | def _format(self): 231 | result = 'maxsize={!r}'.format(self._maxsize) 232 | if getattr(self, '_queue', None): 233 | result += ' _queue={!r}'.format(list(self._queue)) 234 | if self._inernel_getters: 235 | result += ' _getters[{}]'.format(len(self._inernel_getters)) 236 | if self._putters: 237 | result += ' _putters[{}]'.format(len(self._putters)) 238 | if self._unfinished_tasks: 239 | result += ' tasks={}'.format(self._unfinished_tasks) 240 | return result 241 | 242 | def qsize(self): 243 | """Number of items in the queue.""" 244 | size=0 245 | for k,v in self._queue.items(): 246 | size+=len(v) 247 | return size 248 | 249 | @property 250 | def maxsize(self): 251 | """Number of items allowed in the queue.""" 252 | return self._maxsize 253 | 254 | def empty(self,ori=None): 255 | """Return True if the queue is empty, False otherwise.""" 256 | if ori: 257 | return not self._queue[ori] 258 | else: 259 | for k,v in self._queue.items(): 260 | if len(v) >0: 261 | return False 262 | return True 263 | 264 | def full(self): 265 | """Return True if there are maxsize items in the queue. 266 | 267 | Note: if the Queue was initialized with maxsize=0 (the default), 268 | then full() is never True. 269 | """ 270 | if self._maxsize <= 0: 271 | return False 272 | else: 273 | return self.qsize() >= self._maxsize 274 | 275 | 276 | async def put(self, item): 277 | """Put an item into the queue. 278 | 279 | Put an item into the queue. If the queue is full, wait until a free 280 | slot is available before adding item. 281 | 282 | This method is a coroutine. 283 | """ 284 | self._init_dict(item.ori) 285 | while self.full(): 286 | putter = self._loop.create_future() 287 | self._putters.append(putter) 288 | try: 289 | await putter 290 | except: 291 | putter.cancel() # Just in case putter is not done yet. 292 | if not self.full() and not putter.cancelled(): 293 | # We were woken up by get_nowait(), but can't take 294 | # the call. Wake up the next in line. 295 | self._wakeup_next(self._putters[item.ori]) 296 | raise 297 | return self.put_nowait(item) 298 | 299 | def put_nowait(self, item): 300 | """Put an item into the queue without blocking. 301 | 302 | If no free slot is immediately available, raise QueueFull. 303 | """ 304 | if self.full(): 305 | raise asyncio.QueueFull 306 | self._put(item) 307 | # self._unfinished_tasks += 1 308 | # self._finished.clear() 309 | self._wakeup_next(self._inernel_getters[item.ori]) 310 | 311 | async def readable(self,ori): 312 | 313 | while self.empty(ori): 314 | getter = self._loop.create_future() 315 | self._inernel_getters[ori].append(getter) 316 | try: 317 | await getter 318 | except: 319 | getter.cancel() # Just in case getter is not done yet. 320 | 321 | try: 322 | self._inernel_getters[ori].remove(getter) 323 | except ValueError: 324 | pass 325 | 326 | if not self.empty(ori) and not getter.cancelled(): 327 | # We were woken up by put_nowait(), but can't take 328 | # the call. Wake up the next in line. 329 | self._wakeup_next(self._inernel_getters[ori]) 330 | raise 331 | 332 | 333 | async def get_by(self,ori): 334 | """Remove and return an item from the queue. 335 | 336 | If queue is empty, wait until an item is available. 337 | 338 | This method is a coroutine. 339 | """ 340 | self._init_dict(ori) 341 | await self.readable(ori) 342 | return self.get_nowait(ori) 343 | 344 | def get_nowait(self,ori): 345 | """Remove and return an item from the queue. 346 | 347 | Return an item if one is immediately available, else raise QueueEmpty. 348 | """ 349 | if self.empty(ori): 350 | raise asyncio.QueueEmpty 351 | item = self._get(ori) 352 | self._wakeup_next(self._putters) 353 | return item 354 | 355 | 356 | 357 | 358 | 359 | 360 | class SinkQueue(asyncio.Queue): 361 | 362 | # | 363 | # X 364 | def __init__(self): 365 | super().__init__(loop=get_loop()) 366 | self.last_put = None 367 | self.qm=QueueManager() 368 | self.qm.add(self) 369 | self._maxsize=0 370 | def empty(self): 371 | return True 372 | def maxsize(self): 373 | return 0 374 | def qsize(self): 375 | return 0 376 | 377 | def put_nowait(self, item): 378 | raise NotImplementedError() 379 | 380 | async def put(self, item): 381 | # do nothing 382 | if self.qm._dev_mode==True: 383 | if isinstance(item.data,list): 384 | max_len=3 385 | print("-----Output a list len %d , first %d data:---"%(len(item.data),max_len)) 386 | for i,v in enumerate(item.data): 387 | if i >max_len: 388 | break 389 | 390 | print("Sink[%d]: %s"%(i,v)) 391 | else: 392 | print("Sink:",item.data) 393 | 394 | item.destroy() 395 | del self.last_put 396 | self.last_put = item 397 | await asyncio.sleep(0) 398 | 399 | async def get(self): 400 | await asyncio.sleep(0, ) 401 | 402 | def get_nowait(self): 403 | raise NotImplementedError() 404 | 405 | class CachedQueue(asyncio.Queue): 406 | 407 | # | 408 | # X 409 | def __init__(self): 410 | self.last_put = None 411 | self.is_load =False 412 | self.cache=[] 413 | super().__init__(maxsize=128,loop=get_loop()) 414 | QueueManager().add(self) 415 | 416 | 417 | def abandon(self): 418 | self.cache=[] 419 | 420 | # def persist(self,filename): 421 | # with open(filename) as f: 422 | # pickle.dump() 423 | # 424 | # def load(self): 425 | # pass 426 | def load_cache(self,cache): 427 | for item in cache: 428 | super().put_nowait(item) 429 | self.cache=cache 430 | async def get(self): 431 | return await super().get() 432 | 433 | async def put(self, item): 434 | if self.is_load : 435 | raise Exception('can not put to a loaded queue') 436 | # do nothing 437 | self.last_put = item 438 | await super().put(item) 439 | self.cache.append(item) 440 | 441 | class ProxyQueue(asyncio.Queue): 442 | 443 | # | 444 | # X 445 | def __init__(self, q,maxsize=0, loop=None): 446 | super().__init__(maxsize=maxsize,loop=loop) 447 | self._q=q 448 | 449 | 450 | def set_q(self,q): 451 | #it will make the data lose 452 | self._q=q 453 | def empty(self): 454 | return self._q.empty() 455 | def maxsize(self): 456 | return self._q.maxsize 457 | def qsize(self): 458 | return self._q.qsize() 459 | def put_nowait(self, item): 460 | return self._q.put_nowait(item) 461 | 462 | async def put(self, item): 463 | return await self._q.put(item) 464 | 465 | async def get(self): 466 | return await self._q.get() 467 | 468 | async def get_by(self,ori): 469 | return await self._q.get_by(ori) 470 | def clean(self,ori): 471 | return self._q.clean(ori) 472 | def get_nowait(self): 473 | return self._q.get_nowait() -------------------------------------------------------------------------------- /botflow/route.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | from .botframe import BotFrame 4 | from .config import config 5 | from .base import get_loop 6 | from .queue import DataQueue,SinkQueue,CachedQueue,ProxyQueue,ConditionalQueue,QueueManager 7 | from botflow.bdata import Bdata,Databoard 8 | from .botbase import BotManager 9 | from .routebase import Route 10 | from .botflow import BotFlow 11 | import sys 12 | # main pipe 13 | 14 | #note drivedn by data 15 | __all__=["Timer","Tee","Link","Zip","Join"] 16 | class Timer(Route): 17 | def __init__(self, delay=1, max_time=None, until=None): 18 | 19 | # \|/ 20 | # | 21 | # | 22 | 23 | self.delay = delay 24 | self.max_time = max_time 25 | self.until = until 26 | 27 | 28 | def make_route_bot(self, iq,oq): 29 | self.outer_oq=oq 30 | self.outer_iq=iq 31 | self.start_q=[None] 32 | self.output_q=oq 33 | 34 | 35 | async def route_in(self, data): 36 | 37 | await self.output_q.put(data) 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | class Tee(Route): 46 | 47 | def is_last_one(self, list, item): 48 | if item == list[-1]: 49 | return True 50 | else: 51 | return False 52 | 53 | def make_route_bot(self,iq,oq): 54 | 55 | 56 | 57 | q_o = DataQueue() 58 | self.outer_iq=iq 59 | self.outer_oq=oq 60 | 61 | self.start_q=[q_o] 62 | self.output_q=DataQueue() 63 | # if self.share: 64 | # self.start_q.append(oq) 65 | for idx,func in enumerate(self.args): 66 | q_i = q_o 67 | if idx == len(self.args)-1: 68 | if self.joined : 69 | q_o = self.output_q 70 | else: 71 | q_o = SinkQueue() 72 | else: 73 | q_o = DataQueue() 74 | 75 | 76 | 77 | BotFrame.make_bot(q_i, q_o, func) 78 | 79 | async def route_out(self): 80 | return await self.output_q.get() 81 | 82 | 83 | Branch=Tee 84 | 85 | class Link(Route): 86 | 87 | def __init__(self,target_node): 88 | super().__init__() 89 | self.target_node=target_node 90 | self.lock=asyncio.Lock() 91 | def make_route_bot(self,iq,oq): 92 | 93 | 94 | self.share=False 95 | self.joined=True 96 | 97 | self.outer_iq=iq 98 | self.outer_oq=oq 99 | 100 | self.route_target_q=self.target_node.outer_iq 101 | 102 | self.start_q = [DataQueue()] 103 | self.output_q=oq 104 | 105 | 106 | def routein_out_q(self): 107 | return [self.route_target_q] 108 | def get_route_input_q_desc(self): 109 | return [self.outer_oq]+[self.route_target_q]+self.start_q 110 | flag=0 111 | async def route_in(self,data): 112 | 113 | await self.route_target_q.put(data) 114 | 115 | 116 | 117 | 118 | async def route_out(self): 119 | 120 | raise Exception("should not be called") 121 | 122 | 123 | 124 | 125 | 126 | class Zip(Route): 127 | 128 | def __init__(self, *args, merge_node=None): 129 | 130 | super(Route, self).__init__() 131 | 132 | self.route_type = [object] 133 | self.route_func = None 134 | self.args = args 135 | self.share=False 136 | self.joined=True 137 | self.loop=get_loop() 138 | 139 | self.databoard = Databoard() 140 | self.lock=asyncio.Event(loop=get_loop()) 141 | self.ori_list=DataQueue(maxsize=0) 142 | 143 | def routeout_in_q(self): 144 | r=super().routeout_in_q() 145 | r.append(self.ori_list) 146 | return r 147 | 148 | 149 | def make_route_bot(self,iq,oq): 150 | self.outer_iq=iq 151 | self.outer_oq=oq 152 | 153 | 154 | 155 | self.start_q = [] 156 | self.output_q = [] 157 | 158 | 159 | # self.output_q = q_o 160 | for func in self.args: 161 | q_i = DataQueue() 162 | q_o = ConditionalQueue() 163 | self.start_q.append(q_i) 164 | self.output_q.append(q_o) 165 | BotFrame.make_bot(q_i, q_o, func) 166 | 167 | 168 | 169 | 170 | async def route_in(self,bdata): 171 | 172 | await self.ori_list.put(bdata) 173 | 174 | new_bdata=Bdata(bdata.data,bdata) 175 | for q in self.start_q: 176 | await q.put(new_bdata) 177 | 178 | 179 | 180 | 181 | async def route_out(self): 182 | 183 | 184 | o=await self.ori_list.get() 185 | result=[] 186 | for q in self.output_q: 187 | try: 188 | r=await q.get_by(o) 189 | result.append(r.data) 190 | # task=self.loop.create_task(q.get_by(o)) 191 | # tasks.append(task) 192 | except Exception as e: 193 | logging.exception("excd") 194 | 195 | # r=await asyncio.gather(*tasks,get_loop()) 196 | return Bdata(result,o) 197 | 198 | 199 | 200 | def make_route_bot_join(self,oq): 201 | self.share = False 202 | self.joined=True 203 | self.route_type=[object] 204 | 205 | if self.joined: 206 | q_o = oq 207 | else: 208 | q_o = SinkQueue() 209 | 210 | self.start_q = [] 211 | self.output_q = oq 212 | 213 | 214 | for func in self.args: 215 | q_i = DataQueue() 216 | self.start_q.append(q_i) 217 | BotFrame.make_bot(q_i, q_o, func) 218 | 219 | def make_route_bot_joinmerge(self, oq): 220 | 221 | self.start_q = [] 222 | self.output_q = oq 223 | self.merge_q = DataQueue() 224 | self.inner_output_q = DataQueue() 225 | 226 | self.share = False 227 | self.joined = True 228 | self.raw_bdata = True 229 | self.count = 0 230 | 231 | for func in self.args: 232 | i_q = DataQueue() 233 | self.start_q.append(i_q) 234 | BotFrame.make_bot(i_q, self.output_q, func) 235 | 236 | 237 | 238 | 239 | 240 | 241 | async def route_in_joinmerge(self, bdata): 242 | 243 | # if bdata.is_BotControl(): 244 | # await super().route_in(bdata) 245 | # 246 | # else: 247 | 248 | data = Bdata(bdata.data, bdata) 249 | data.count = 0 250 | await super().route_in(data) 251 | 252 | r = await self.databoard.wait_ori(bdata) 253 | await self.merge_node.put_result(r) 254 | self.databoard.drop_ori(bdata) 255 | 256 | 257 | class Join(Route): 258 | 259 | def __init__(self, *args, merge_node=None): 260 | 261 | super(Route, self).__init__() 262 | 263 | self.route_type = [object] 264 | self.route_func = None 265 | self.merge_node = merge_node 266 | self.args = args 267 | self.databoard = Databoard() 268 | 269 | 270 | 271 | def make_route_bot(self,iq,oq): 272 | self.outer_iq=iq 273 | self.outer_oq=oq 274 | 275 | if self.merge_node is None: 276 | self.make_route_bot_join(oq) 277 | else: 278 | self.make_route_bot_joinmerge(oq) 279 | 280 | 281 | async def route_in(self,bdata): 282 | if self.merge_node is None: 283 | if bdata.ori.ori ==0: 284 | new_data=Bdata(bdata.data, bdata) 285 | await super().route_in(new_data) 286 | else: 287 | await super().route_in(bdata) 288 | else: 289 | await self.route_in_joinmerge(bdata) 290 | 291 | 292 | def make_route_bot_join(self,oq): 293 | self.share = False 294 | self.joined=True 295 | self.route_type=[object] 296 | 297 | if self.joined: 298 | q_o = oq 299 | else: 300 | q_o = SinkQueue() 301 | 302 | self.start_q = [] 303 | self.output_q = oq 304 | 305 | 306 | for func in self.args: 307 | q_i = DataQueue() 308 | self.start_q.append(q_i) 309 | BotFrame.make_bot(q_i, q_o, func) 310 | 311 | def make_route_bot_joinmerge(self, oq): 312 | 313 | self.start_q = [] 314 | self.output_q = oq 315 | self.merge_q = DataQueue() 316 | self.inner_output_q = DataQueue() 317 | 318 | self.share = False 319 | self.joined = True 320 | self.raw_bdata = True 321 | self.count = 0 322 | 323 | for func in self.args: 324 | i_q = DataQueue() 325 | self.start_q.append(i_q) 326 | BotFrame.make_bot(i_q, self.output_q, func) 327 | 328 | 329 | 330 | 331 | 332 | 333 | async def route_in_joinmerge(self, bdata): 334 | 335 | # if bdata.is_BotControl(): 336 | # await super().route_in(bdata) 337 | # 338 | # else: 339 | 340 | data = Bdata(bdata.data, bdata) 341 | data.count = 0 342 | await super().route_in(data) 343 | 344 | r = await self.databoard.wait_ori(bdata) 345 | await self.merge_node.put_result(r) 346 | self.databoard.drop_ori(bdata) 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | ###########short name ############ 359 | 360 | J = Join 361 | B = Branch 362 | T = Timer -------------------------------------------------------------------------------- /botflow/routebase.py: -------------------------------------------------------------------------------- 1 | import botflow.queue as queue 2 | import typing 3 | from .bdata import Databoard 4 | from .base import get_loop 5 | 6 | class RouteRule(object): 7 | __slots__ = ['output_q', 'type_list', 'share'] 8 | 9 | def __init__(self, output_q, types_list, share): 10 | self.output_q = output_q 11 | self.type_list = types_list 12 | self.share = share 13 | self.loop=get_loop() 14 | 15 | def is_match(self, o): 16 | for t in self.type_list: 17 | if isinstance(o, t): 18 | return True 19 | 20 | return False 21 | 22 | def is_share(self): 23 | return self.share 24 | 25 | def __eq__(self, other): 26 | return False 27 | 28 | def __lt__(self, other): 29 | 30 | if self.share == False: 31 | return True 32 | else: 33 | return False 34 | 35 | 36 | class RouteTable(object): 37 | __slots__ = ['rules'] 38 | 39 | def __init__(self): 40 | self.rules = queue.DataQueue() 41 | 42 | def add_rules(self, r): 43 | self.rules.put(r) 44 | 45 | async def route(self, msg): 46 | matched_q = [] 47 | for r in self.rules.queue: 48 | if r.is_match(msg): 49 | matched_q.append(r.output_q) 50 | 51 | # use wait api ,it maybe blocked(wait) in a q. then block other q speed 52 | await r.output_q.put(msg) 53 | if not r.is_share(): 54 | break 55 | 56 | pass 57 | 58 | 59 | def ensure_list(v): 60 | '''make sure all ways return a list''' 61 | #? None >> [] ? 62 | if not isinstance(v,list): 63 | return [v] 64 | else: 65 | return v 66 | class Route(object): 67 | 68 | def __init__(self, *args, route_type=object,route_func=None, share=True, join=False): 69 | 70 | self.name=str(self.__class__) 71 | self.in_table = RouteTable() 72 | self.out_table = RouteTable() 73 | self.args = args 74 | self.route_type=ensure_list(route_type) 75 | self.databoard = Databoard() 76 | self.share = share 77 | self.joined = join 78 | self.outer_iq=None 79 | self.outer_oq=None 80 | self.loop=get_loop() 81 | self.start_q=None 82 | self.output_q=None 83 | self.route_func=route_func 84 | if self.route_func is not None and not isinstance(self.route_func,typing.Callable): 85 | raise Exception('route_func not callable') 86 | 87 | 88 | 89 | if hasattr(self, 'route_type') and not isinstance(self.route_type, list): 90 | self.route_type = [self.route_type] 91 | 92 | 93 | def routeout_out_q(self): 94 | return [self.outer_oq] 95 | 96 | def routeout_in_q(self): 97 | qs=[] 98 | if isinstance(self.output_q,list): 99 | for q in self.output_q: 100 | if isinstance(self.output_q, queue.SinkQueue): 101 | continue 102 | qs.append(q) 103 | 104 | elif not isinstance(self.output_q, queue.SinkQueue): 105 | qs.append(self.output_q) 106 | 107 | return qs 108 | 109 | def routein_out_q(self): 110 | if self.share and not isinstance(self.outer_oq, queue.SinkQueue): 111 | return self.start_q+[self.outer_oq] 112 | else: 113 | return self.start_q 114 | 115 | def routein_in_q(self): 116 | return [self.outer_iq] 117 | 118 | 119 | 120 | async def _route_data(self,bdata): 121 | 122 | data=bdata.data 123 | # is_signal= bdata.is_BotControl() 124 | 125 | matched = self.type_match(data, self.route_type) and (self.route_func is None or self.route_func(data)) 126 | 127 | 128 | 129 | 130 | if self.share == True: 131 | bdata.incr() 132 | await self.outer_oq.put(bdata) 133 | else: 134 | if not matched: 135 | bdata.incr() 136 | await self.outer_oq.put(bdata) 137 | else: 138 | pass 139 | 140 | 141 | if matched: 142 | bdata.incr(n=len(self.start_q)) 143 | for q in self.start_q: 144 | await q.put(bdata) 145 | 146 | 147 | async def __call__(self, data): 148 | 149 | if isinstance(data,list): 150 | for d in data: 151 | await self._route_data(data) 152 | else: 153 | await self._route_data(data) 154 | 155 | return 156 | 157 | async def route_in(self,data): 158 | if isinstance(data,list): 159 | for d in data: 160 | await self._route_data(data) 161 | else: 162 | await self._route_data(data) 163 | 164 | return 165 | 166 | async def route_out(self): 167 | 168 | # for q in self.output_q: 169 | if isinstance(self.output_q, queue.SinkQueue): 170 | return 171 | r=await self.output_q.get() 172 | return r 173 | 174 | 175 | def make_route_bot(self,iq,oq): 176 | '''the out side input and outpt queue''' 177 | raise NotImplementedError() 178 | 179 | 180 | @classmethod 181 | def type_match(cls, msg, type_list): 182 | for t in type_list: 183 | if isinstance(msg, t): 184 | return True 185 | 186 | return False 187 | 188 | 189 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = databot 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-minimal -------------------------------------------------------------------------------- /docs/bitcoin_arbitrage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kkyon/botflow/3c06afb8c9c8e0ac962bd2d157a79efcf05d913a/docs/bitcoin_arbitrage.png -------------------------------------------------------------------------------- /docs/bot/index.rst: -------------------------------------------------------------------------------- 1 | Bot 2 | ============ 3 | 4 | 5 | 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | 10 | replay 11 | 12 | .. contents:: 13 | :local: 14 | 15 | 16 | Run 17 | --- 18 | 19 | Exception 20 | --------- 21 | 22 | 23 | Exception behavior will act according to ```config.Exception_policy = config.Exception_raise ``` setting. 24 | 25 | :Exception_default: default exception policy is raise 26 | :Exception_raise: raise exception 27 | :Exception_ignore: ignore exception. exception raised from node will be suppressed. 28 | :Exception_retry: the value will put in input-queue after some delay. 29 | :Exception_pipein: the exception tread as returen value ,put in output queue. it will be usefull 30 | in blockedjoin route scenarios. 31 | 32 | 33 | How to debug 34 | ------------ 35 | 36 | -------------------------------------------------------------------------------- /docs/bot/replay.rst: -------------------------------------------------------------------------------- 1 | replay 2 | ====== 3 | 4 | .. contents:: 5 | :local: 6 | 7 | 8 | When it work? 9 | ------------- 10 | 11 | With ```config.replay_mode=True``` ,the Bot will turn replay mode. 12 | when an exception is raised at step N, you don't need to run from setup 1 to N. 13 | Botflow will replay the data from nearest completed node, usually step N-1. 14 | It will save a lot of time in the development phase. 15 | 16 | There are two mandatory condition for replay mode. 17 | 18 | #. Exception raised. 19 | #. Node completable. for unlimited stream pipe. it is unable to replay. 20 | 21 | for below example .when restart application after exception raised, the double function will not be 22 | executed any more. 23 | 24 | .. code-block:: python 25 | 26 | from Botflow.flow import Pipe, Loop, Fork 27 | from Botflow.botframe import BotFrame 28 | from Botflow.config import config 29 | import time 30 | 31 | def double(a): 32 | print('double %d'%a) 33 | time.sleep(1) 34 | return 2*a 35 | 36 | 37 | count=0 38 | def triple(a): 39 | global count 40 | count+=1 41 | if count>6: 42 | raise Exception() 43 | pass 44 | return 3*a 45 | 46 | # 47 | config.replay_mode=True 48 | 49 | def main(): 50 | Pipe( 51 | 52 | Loop(range(10)), 53 | double, 54 | triple, 55 | print 56 | 57 | 58 | ) 59 | 60 | BotFrame.run() 61 | 62 | 63 | 64 | main() 65 | 66 | 67 | 68 | How it work? 69 | ------------ 70 | When exception raised , botframe will dump the cached data of the nearest completed node to disk. 71 | it will restore from disk when restart. And the completed node of previous running will not be start 72 | again. 73 | 74 | .. warning:: 75 | 76 | Current version 0.1.7 ,in reply mode the data is cached in RAM. 77 | so not use when processing big data. 78 | 79 | 80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /docs/bot/run.rst: -------------------------------------------------------------------------------- 1 | Run Render 2 | =========== -------------------------------------------------------------------------------- /docs/change/0.1.8.rst: -------------------------------------------------------------------------------- 1 | Version 0.1.8 2 | ============= 3 | 4 | 5 | 6 | 7 | 8 | #. Loop Node is deprecated .Basic value will work as Node.for Iterable (list, generator ,range, except tuple) will be looped like Loop node.for int,str,dict,will be put into queue directly. 9 | 10 | #. New Filter Node support.add route_func support to All Route. it will support by route with funct. 11 | 12 | .. code-block:: python 13 | 14 | Pipe( 15 | 16 | [A(),B(),C()], 17 | Filter(route_type=[A,B],route_func=lambda r:isinstance(r,(A,C))), 18 | self.only_a 19 | 20 | ) 21 | 22 | 23 | #. Add Httprequest support for post,put option, 24 | 25 | #. Add Node.boost decorators. it will parallel function in thread pool .it is good option for some slow funciton. 26 | 27 | .. code-block:: python 28 | 29 | @Node.boost 30 | def very_slow(a): 31 | import time 32 | time.sleep(10) -------------------------------------------------------------------------------- /docs/change/0.1.9.rst: -------------------------------------------------------------------------------- 1 | Botflow release v0.1.9 with Http Server support 2 | =============================================== 3 | 4 | 5 | #. Officially rename project to Botflow. 6 | 7 | #. Enable Http Server support.Pipe can be work as Coroutine to integrate with other Asyncio framework. 8 | 9 | .. code-block:: python 10 | 11 | from botflow import * 12 | from aiohttp import web 13 | 14 | p = Pipe( 15 | {"msg":"hello world!"} 16 | ) 17 | app = web.Application() 18 | 19 | app.add_routes([ 20 | web.get('/', p.aiohttp_json_handle) 21 | ]) 22 | 23 | Bot.run_app(app) 24 | 25 | 26 | #. Add new Route "SendTo". It can be used for redirect to data flow to the Node. 27 | 28 | .. code-block:: python 29 | 30 | def filter_out(url): 31 | global count 32 | if 'http' not in url: 33 | url = "http://127.0.0.1:8080{}".format(url) 34 | 35 | if url in seen : #filter out processed links 36 | return None 37 | seen.add(url) 38 | return url 39 | 40 | 41 | def find_all_links(r): 42 | for a in r.soup.find_all('a', href=True): 43 | yield a.get('href') 44 | 45 | 46 | 47 | 48 | b = Return( 49 | 50 | filter_out, #filter out processed links 51 | HttpLoader(), 52 | find_all_links, #find all links in new page. 53 | ) 54 | 55 | Pipe( 56 | "http://127.0.0.1:8080/", 57 | b, 58 | SendTo(b), #send new url to process again . it will make a loop 59 | 60 | ) 61 | 62 | #. Add new Node type "SpeedLimit" "Delay" .For speed control 63 | 64 | #. Add new Node type "Zip". For zip multi flow item to list. 65 | 66 | #. Rewrite whole project for code more readable. 67 | 68 | #. Import flow performance by optimize graph generate algorithm . 69 | 70 | #. Import crawler case performance. For local server broad crawler bench test. 71 | it can reach 760 pages per second. 10x faster than Scrapy. -------------------------------------------------------------------------------- /docs/change/0.2.0.rst: -------------------------------------------------------------------------------- 1 | 0.2.0 Milestone release.: 2 | =============================== 3 | 4 | #. Jupyter support. Able to run inside Jupyter note book. 5 | 6 | #. pipe can be nest in another Pipe. 7 | 8 | p1=Pipe(get_image) 9 | p2=Pipe(get_price) 10 | p_get_all=Pipe(Zip(p1,p2)).Filter 11 | 12 | #. Support Chain style pipe line creating. 13 | 14 | Pipe(range(1,10)).Map(lambda x:x+1).Fiter(lambda x:x>2) 15 | 16 | same as : 17 | 18 | Pipe(range(1,10),lambda x:x+1,Filter(lambda x:x>2)) -------------------------------------------------------------------------------- /docs/change/index.rst: -------------------------------------------------------------------------------- 1 | Change 2 | ------ 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | 7 | 0.2.0 8 | 0.1.9 9 | 0.1.8 -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | # import os 16 | # import sys 17 | # sys.path.insert(0, os.path.abspath('.')) 18 | 19 | 20 | # -- Project information ----------------------------------------------------- 21 | 22 | project = 'Botflow' 23 | copyright = '2018, kkyon' 24 | author = 'kkyon' 25 | 26 | # The short X.Y version 27 | version = '0.1.9' 28 | # The full version, including alpha/beta/rc tags 29 | release = 'alpha' 30 | 31 | 32 | # -- General configuration --------------------------------------------------- 33 | 34 | # If your documentation needs a minimal Sphinx version, state it here. 35 | # 36 | # needs_sphinx = '1.0' 37 | 38 | # Add any Sphinx extension module names here, as strings. They can be 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 40 | # ones. 41 | extensions = [ 42 | ] 43 | 44 | # Add any paths that contain templates here, relative to this directory. 45 | templates_path = ['_templates'] 46 | 47 | # The suffix(es) of source filenames. 48 | # You can specify multiple suffix as a list of string: 49 | # 50 | # source_suffix = ['.rst', '.md'] 51 | source_suffix = '.rst' 52 | 53 | # The master toctree document. 54 | master_doc = 'index' 55 | 56 | # The language for content autogenerated by Sphinx. Refer to documentation 57 | # for a list of supported languages. 58 | # 59 | # This is also used if you do content translation via gettext catalogs. 60 | # Usually you set "language" from the command line for these cases. 61 | language = None 62 | 63 | # List of patterns, relative to source directory, that match files and 64 | # directories to ignore when looking for source files. 65 | # This pattern also affects html_static_path and html_extra_path . 66 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 67 | 68 | # The name of the Pygments (syntax highlighting) style to use. 69 | pygments_style = 'sphinx' 70 | 71 | 72 | # -- Options for HTML output ------------------------------------------------- 73 | 74 | # The theme to use for HTML and HTML Help pages. See the documentation for 75 | # a list of builtin themes. 76 | # 77 | html_theme = 'alabaster' 78 | 79 | # Theme options are theme-specific and customize the look and feel of a theme 80 | # further. For a list of options available for each theme, see the 81 | # documentation. 82 | # 83 | # html_theme_options = {} 84 | 85 | # Add any paths that contain custom static files (such as style sheets) here, 86 | # relative to this directory. They are copied after the builtin static files, 87 | # so a file named "default.css" will overwrite the builtin "default.css". 88 | html_static_path = ['_static'] 89 | 90 | # Custom sidebar templates, must be a dictionary that maps document names 91 | # to template names. 92 | # 93 | # The default sidebars (for documents that don't match any pattern) are 94 | # defined by theme itself. Builtin themes are using these templates by 95 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 96 | # 'searchbox.html']``. 97 | # 98 | # html_sidebars = {} 99 | 100 | 101 | # -- Options for HTMLHelp output --------------------------------------------- 102 | 103 | # Output file base name for HTML help builder. 104 | htmlhelp_basename = 'botflowdoc' 105 | 106 | 107 | # -- Options for LaTeX output ------------------------------------------------ 108 | 109 | latex_elements = { 110 | # The paper size ('letterpaper' or 'a4paper'). 111 | # 112 | # 'papersize': 'letterpaper', 113 | 114 | # The font size ('10pt', '11pt' or '12pt'). 115 | # 116 | # 'pointsize': '10pt', 117 | 118 | # Additional stuff for the LaTeX preamble. 119 | # 120 | # 'preamble': '', 121 | 122 | # Latex figure (float) alignment 123 | # 124 | # 'figure_align': 'htbp', 125 | } 126 | 127 | # Grouping the document tree into LaTeX files. List of tuples 128 | # (source start file, target name, title, 129 | # author, documentclass [howto, manual, or own class]). 130 | latex_documents = [ 131 | (master_doc, 'botflow.tex', 'botflow Documentation', 132 | 'kkyon', 'manual'), 133 | ] 134 | 135 | 136 | # -- Options for manual page output ------------------------------------------ 137 | 138 | # One entry per manual page. List of tuples 139 | # (source start file, name, description, authors, manual section). 140 | man_pages = [ 141 | (master_doc, 'botflow', 'botflow Documentation', 142 | [author], 1) 143 | ] 144 | 145 | 146 | # -- Options for Texinfo output ---------------------------------------------- 147 | 148 | # Grouping the document tree into Texinfo files. List of tuples 149 | # (source start file, target name, title, author, 150 | # dir menu entry, description, category) 151 | texinfo_documents = [ 152 | (master_doc, 'botflow', 'botflow Documentation', 153 | author, 'botflow', 'One line description of project.', 154 | 'Miscellaneous'), 155 | ] 156 | -------------------------------------------------------------------------------- /docs/examples/index.rst: -------------------------------------------------------------------------------- 1 | Example (案例) 2 | ------------- 3 | 4 | .. toctree:: 5 | :maxdepth: 1 6 | 7 | lagou_spider -------------------------------------------------------------------------------- /docs/examples/lagou1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kkyon/botflow/3c06afb8c9c8e0ac962bd2d157a79efcf05d913a/docs/examples/lagou1.jpg -------------------------------------------------------------------------------- /docs/examples/lagou2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kkyon/botflow/3c06afb8c9c8e0ac962bd2d157a79efcf05d913a/docs/examples/lagou2.jpg -------------------------------------------------------------------------------- /docs/examples/lagou3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kkyon/botflow/3c06afb8c9c8e0ac962bd2d157a79efcf05d913a/docs/examples/lagou3.jpg -------------------------------------------------------------------------------- /docs/examples/lagou5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kkyon/botflow/3c06afb8c9c8e0ac962bd2d157a79efcf05d913a/docs/examples/lagou5.jpg -------------------------------------------------------------------------------- /docs/examples/lagou_spider.rst: -------------------------------------------------------------------------------- 1 | lagou_spider(拉勾职位爬虫) 2 | ========================== 3 | 4 | 假设你接到一个需要,让你利用拉勾的招聘JD,调查Python程序在北上深杭的薪资情况。需要你写个爬虫来完成。 5 | 6 | 本文需要工具 7 | ----------- 8 | 9 | :Chrome: 用于网页请求包抓取 10 | 11 | :Botflow: 用于爬虫编写 12 | 13 | :Pycharm CE: 用于代码编程。 14 | 15 | 16 | 如何安装Botflow: 17 | --------------- 18 | ```pip install -U botflow``` 19 | 20 | 21 | 22 | 使用chrome获取请求内容 23 | -------------------- 24 | 25 | #. 打开chrome 开发者工作, 26 | 27 | #. 切换到 XHR ,这种模式只会抓取 Ajax请求 28 | 29 | #. 在拉勾网页上搜索,北京地区的,python岗位,然后翻页。在开发者工具中,你就可以看到,positionAjax.json 相关的请求。 30 | 31 | .. image:: lagou1.jpg 32 | 33 | .. image:: lagou2.jpg 34 | 35 | #. 右键选中,Copy > Copy request headers. 36 | 37 | .. code-block:: Text 38 | 39 | POST /jobs/positionAjax.json?city=%E5%8C%97%E4%BA%AC&needAddtionalResult=false HTTP/1.1 40 | Host: www.lagou.com 41 | Connection: keep-alive 42 | Content-Length: 26 43 | Pragma: no-cache 44 | Cache-Control: no-cache 45 | Origin: https://www.lagou.com 46 | X-Anit-Forge-Code: 0 47 | User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36 48 | Content-Type: application/x-www-form-urlencoded; charset=UTF-8 49 | Accept: application/json, text/javascript, */*; q=0.01 50 | X-Requested-With: XMLHttpRequest 51 | X-Anit-Forge-Token: None 52 | Referer: https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput= 53 | Accept-Encoding: gzip, deflate, br 54 | Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7 55 | Cookie: _ga=GA1.2.1690296475.1518425441; user_trace_token=20180212165041-cde58656-0fd1-11e8-8654-525400f775ce; LGUID=20180212165041-cde58a1b-0fd1-11e8-8654-525400f775ce; gray=resume; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%228310232%22%2C%22%24device_id%22%3A%22162fbe017557-0454a54748a333-336c7b05-1764000-162fbe01756131a%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%2C%22first_id%22%3A%22162fbe017557-0454a54748a333-336c7b05-1764000-162fbe01756131a%22%7D; LG_LOGIN_USER_ID=2c307fca8f7021f2bcb48e09504f48685acb6eac181de81c; showExpriedIndex=1; showExpriedCompanyHome=1; showExpriedMyPublish=1; hasDeliver=0; JSESSIONID=ABAAABAABEEAAJAE07FBC7D4DF87B66F2D4043894D889A7; _putrc=6371D2A6DE42118C; login=true; mds_u_n=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu63a7%5Cu5317%5Cu4eac; mds_u_ci=159558; X_HTTP_TOKEN=ee02191ab39aeafe8221a97bdbc5d06f; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1534149795,1534919704; yun_switch_company=1; _gid=GA1.2.236062443.1536480058; LGSID=20180909160058-7c6bb276-b406-11e8-b62b-5254005c3644; PRE_UTM=; PRE_HOST=; PRE_SITE=; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; gate_login_token=cf101a24f676e9e598be30fb739271f3ff8e76bfbec59f83; mds_u_cn=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu79d1%5Cu63a7%5Cu80a1%5Cu6709%5Cu9650%5Cu516c%5Cu53f8; mds_u_s_cn=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu670d; unick=18800118876; yun_push_authToken="JZfh+Grg9GvCxIVJVfHfI8KjD/G0bxVo0HtuvLII5/Xm2NlGNg1UsHxxDJqQFc9fnGoyeBZvhUfuvY8Mto3upBWRPaYAk7heQlCWp63hWDaDJ2uyAFAq3DTK7j24YxX7e7g1RVQvFTZW5KbQFd0sMQxCv4X/4NwrwsS/v5aXU+x4rucJXOpldXhUiavxhcCELWDotJ+bmNVwmAvQCptcy5e7czUcjiQC32Lco44BMYXrQ+AIOfEccJKHpj0vJ+ngq/27aqj1hWq8tEPFFjdnxMSfKgAnjbIEAX3F9CIW8BSiMHYmPBt7FDDY0CCVFICHr2dp5gQVGvhfbqg7VzvNsw=="; mds_login_authToken="OLzLRoAc/eD6AABAewCWpbIJ/eu8qBHKDjhbdDnbV9JdlQrgjiZ+LWjOaziTjnZP77x9C5OlLk4DFgWJFQ8ekmw27Av1P+GjJ8WNV82JuqEuRpQKnHSmTWaLosUgWRvrYi7/C/KiuNlQWdVzDdRn2Wir0LqzdL/PqN28NmThSXd4rucJXOpldXhUiavxhcCELWDotJ+bmNVwmAvQCptcy5e7czUcjiQC32Lco44BMYXrQ+AIOfEccJKHpj0vJ+ngq/27aqj1hWq8tEPFFjdnxMSfKgAnjbIEAX3F9CIW8BSiMHYmPBt7FDDY0CCVFICHr2dp5gQVGvhfbqg7VzvNsw=="; index_location_city=%E5%85%A8%E5%9B%BD; TG-TRACK-CODE=search_code; SEARCH_ID=dce8f7fa57814b2ba25a330df122cce5; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1536480724; LGRID=20180909161204-09522e65-b408-11e8-b62b-5254005c3644; _ga=GA1.2.1690296475.1518425441; user_trace_token=20180212165041-cde58656-0fd1-11e8-8654-525400f775ce; LGUID=20180212165041-cde58a1b-0fd1-11e8-8654-525400f775ce; gray=resume; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%228310232%22%2C%22%24device_id%22%3A%22162fbe017557-0454a54748a333-336c7b05-1764000-162fbe01756131a%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%2C%22first_id%22%3A%22162fbe017557-0454a54748a333-336c7b05-1764000-162fbe01756131a%22%7D; showExpriedIndex=1; showExpriedCompanyHome=1; showExpriedMyPublish=1; hasDeliver=0; JSESSIONID=ABAAABAABEEAAJAE07FBC7D4DF87B66F2D4043894D889A7; mds_u_n=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu63a7%5Cu5317%5Cu4eac; mds_u_ci=159558; X_HTTP_TOKEN=ee02191ab39aeafe8221a97bdbc5d06f; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1534149795,1534919704; yun_switch_company=1; _gid=GA1.2.236062443.1536480058; mds_u_cn=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu79d1%5Cu63a7%5Cu80a1%5Cu6709%5Cu9650%5Cu516c%5Cu53f8; mds_u_s_cn=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu670d; LGSID=20180910110134-d3475ab4-b4a5-11e8-b62b-5254005c3644; PRE_UTM=; PRE_HOST=; PRE_SITE=; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; gate_login_token=21d2ec82a5c90746ab0a09e3014903592062af3c2a0e44a3; yun_push_authToken="K3X2PunJpOPcH6Jps7SH2GIRGCFgFXxWS73fruNGbe5BLuIDysbZl2SSsqWlCQOmGY8KGQl8UWoJ08WEvZtQHQd2R4sMO1Q6skl77olVWj4P6T4vls04NALTvIJxMxC4zd9CJ+eIEJBemWIFuPIzMIQenQ3GU870INQTDV8C2xh4rucJXOpldXhUiavxhcCELWDotJ+bmNVwmAvQCptcy5e7czUcjiQC32Lco44BMYXrQ+AIOfEccJKHpj0vJ+ngq/27aqj1hWq8tEPFFjdnxMSfKgAnjbIEAX3F9CIW8BSiMHYmPBt7FDDY0CCVFICHr2dp5gQVGvhfbqg7VzvNsw=="; login=false; unick=""; mds_login_authToken=""; _putrc=""; LG_LOGIN_USER_ID=""; index_location_city=%E5%8C%97%E4%BA%AC; TG-TRACK-CODE=index_search; SEARCH_ID=3658325e35c04672893f1f53b3b929da; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1536548672; LGRID=20180910110432-3dbd334a-b4a6-11e8-b62b-5254005c3644 56 | 57 | #. Query String内容保存下来 58 | 59 | .. code-block:: Text 60 | 61 | city=%E5%8C%97%E4%BA%AC&needAddtionalResult=false 62 | 63 | #. Form Data内容保持下来 64 | .. code-block:: Text 65 | 66 | first=false&pn=3&kd=python 67 | 68 | 69 | ##所有需要抓取的信息都在这里。##下面就可以开始撸代码了。## 70 | 71 | 72 | 第一版本: 73 | -------- 74 | 使用pip安装,确保你用的python3.6 有的同学可能需要pip3命令。 75 | ```pip install -U botflow``` or ```pip3 install -U botflow``` 76 | 77 | .. code-block:: python 78 | 79 | from botflow import * 80 | 81 | request_headers=""" 82 | POST /jobs/positionAjax.json?city=%E5%8C%97%E4%BA%AC&needAddtionalResult=false HTTP/1.1 83 | Host: www.lagou.com 84 | Connection: keep-alive 85 | Content-Length: 26 86 | Pragma: no-cache 87 | Cache-Control: no-cache 88 | Origin: https://www.lagou.com 89 | X-Anit-Forge-Code: 0 90 | User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36 91 | Content-Type: application/x-www-form-urlencoded; charset=UTF-8 92 | Accept: application/json, text/javascript, */*; q=0.01 93 | X-Requested-With: XMLHttpRequest 94 | X-Anit-Forge-Token: None 95 | Referer: https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput= 96 | Accept-Encoding: gzip, deflate, br 97 | Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7 98 | Cookie: _ga=GA1.2.1690296475.1518425441; user_trace_token=20180212165041-cde58656-0fd1-11e8-8654-525400f775ce; LGUID=20180212165041-cde58a1b-0fd1-11e8-8654-525400f775ce; gray=resume; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%228310232%22%2C%22%24device_id%22%3A%22162fbe017557-0454a54748a333-336c7b05-1764000-162fbe01756131a%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%2C%22first_id%22%3A%22162fbe017557-0454a54748a333-336c7b05-1764000-162fbe01756131a%22%7D; LG_LOGIN_USER_ID=2c307fca8f7021f2bcb48e09504f48685acb6eac181de81c; showExpriedIndex=1; showExpriedCompanyHome=1; showExpriedMyPublish=1; hasDeliver=0; JSESSIONID=ABAAABAABEEAAJAE07FBC7D4DF87B66F2D4043894D889A7; _putrc=6371D2A6DE42118C; login=true; mds_u_n=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu63a7%5Cu5317%5Cu4eac; mds_u_ci=159558; X_HTTP_TOKEN=ee02191ab39aeafe8221a97bdbc5d06f; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1534149795,1534919704; yun_switch_company=1; _gid=GA1.2.236062443.1536480058; LGSID=20180909160058-7c6bb276-b406-11e8-b62b-5254005c3644; PRE_UTM=; PRE_HOST=; PRE_SITE=; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; gate_login_token=cf101a24f676e9e598be30fb739271f3ff8e76bfbec59f83; mds_u_cn=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu79d1%5Cu63a7%5Cu80a1%5Cu6709%5Cu9650%5Cu516c%5Cu53f8; mds_u_s_cn=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu670d; unick=18800118876; yun_push_authToken="JZfh+Grg9GvCxIVJVfHfI8KjD/G0bxVo0HtuvLII5/Xm2NlGNg1UsHxxDJqQFc9fnGoyeBZvhUfuvY8Mto3upBWRPaYAk7heQlCWp63hWDaDJ2uyAFAq3DTK7j24YxX7e7g1RVQvFTZW5KbQFd0sMQxCv4X/4NwrwsS/v5aXU+x4rucJXOpldXhUiavxhcCELWDotJ+bmNVwmAvQCptcy5e7czUcjiQC32Lco44BMYXrQ+AIOfEccJKHpj0vJ+ngq/27aqj1hWq8tEPFFjdnxMSfKgAnjbIEAX3F9CIW8BSiMHYmPBt7FDDY0CCVFICHr2dp5gQVGvhfbqg7VzvNsw=="; mds_login_authToken="OLzLRoAc/eD6AABAewCWpbIJ/eu8qBHKDjhbdDnbV9JdlQrgjiZ+LWjOaziTjnZP77x9C5OlLk4DFgWJFQ8ekmw27Av1P+GjJ8WNV82JuqEuRpQKnHSmTWaLosUgWRvrYi7/C/KiuNlQWdVzDdRn2Wir0LqzdL/PqN28NmThSXd4rucJXOpldXhUiavxhcCELWDotJ+bmNVwmAvQCptcy5e7czUcjiQC32Lco44BMYXrQ+AIOfEccJKHpj0vJ+ngq/27aqj1hWq8tEPFFjdnxMSfKgAnjbIEAX3F9CIW8BSiMHYmPBt7FDDY0CCVFICHr2dp5gQVGvhfbqg7VzvNsw=="; index_location_city=%E5%85%A8%E5%9B%BD; TG-TRACK-CODE=search_code; SEARCH_ID=dce8f7fa57814b2ba25a330df122cce5; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1536480724; LGRID=20180909161204-09522e65-b408-11e8-b62b-5254005c3644; _ga=GA1.2.1690296475.1518425441; user_trace_token=20180212165041-cde58656-0fd1-11e8-8654-525400f775ce; LGUID=20180212165041-cde58a1b-0fd1-11e8-8654-525400f775ce; gray=resume; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%228310232%22%2C%22%24device_id%22%3A%22162fbe017557-0454a54748a333-336c7b05-1764000-162fbe01756131a%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%2C%22first_id%22%3A%22162fbe017557-0454a54748a333-336c7b05-1764000-162fbe01756131a%22%7D; showExpriedIndex=1; showExpriedCompanyHome=1; showExpriedMyPublish=1; hasDeliver=0; JSESSIONID=ABAAABAABEEAAJAE07FBC7D4DF87B66F2D4043894D889A7; mds_u_n=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu63a7%5Cu5317%5Cu4eac; mds_u_ci=159558; X_HTTP_TOKEN=ee02191ab39aeafe8221a97bdbc5d06f; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1534149795,1534919704; yun_switch_company=1; _gid=GA1.2.236062443.1536480058; mds_u_cn=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu79d1%5Cu63a7%5Cu80a1%5Cu6709%5Cu9650%5Cu516c%5Cu53f8; mds_u_s_cn=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu670d; LGSID=20180910110134-d3475ab4-b4a5-11e8-b62b-5254005c3644; PRE_UTM=; PRE_HOST=; PRE_SITE=; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; gate_login_token=21d2ec82a5c90746ab0a09e3014903592062af3c2a0e44a3; yun_push_authToken="K3X2PunJpOPcH6Jps7SH2GIRGCFgFXxWS73fruNGbe5BLuIDysbZl2SSsqWlCQOmGY8KGQl8UWoJ08WEvZtQHQd2R4sMO1Q6skl77olVWj4P6T4vls04NALTvIJxMxC4zd9CJ+eIEJBemWIFuPIzMIQenQ3GU870INQTDV8C2xh4rucJXOpldXhUiavxhcCELWDotJ+bmNVwmAvQCptcy5e7czUcjiQC32Lco44BMYXrQ+AIOfEccJKHpj0vJ+ngq/27aqj1hWq8tEPFFjdnxMSfKgAnjbIEAX3F9CIW8BSiMHYmPBt7FDDY0CCVFICHr2dp5gQVGvhfbqg7VzvNsw=="; login=false; unick=""; mds_login_authToken=""; _putrc=""; LG_LOGIN_USER_ID=""; index_location_city=%E5%8C%97%E4%BA%AC; TG-TRACK-CODE=index_search; SEARCH_ID=3658325e35c04672893f1f53b3b929da; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1536548672; LGRID=20180910110432-3dbd334a-b4a6-11e8-b62b-5254005c3644 99 | """ 100 | 101 | payload="first=false&pn=3&kd=python" 102 | 103 | url= "https://www.lagou.com/jobs/positionAjax.json?city=%E5%8C%97%E4%BA%AC&needAddtionalResult=false" 104 | 105 | p=Pipe( 106 | 107 | HttpRequest( 108 | request_headers=request_headers, 109 | url = url, 110 | payload = payload, 111 | 112 | ), #构造请求 113 | HttpLoader(), #发送请求 114 | 115 | ) 116 | 117 | Bot.run() 118 | 119 | 120 | 121 | .. image:: lagou3.jpg 122 | 123 | 如果看到以上输出,恭喜你。你完成大概70%。后面就完成, 124 | #. 如何翻页和切换城市, 125 | #. 还有解析输出的json格式。 126 | 127 | 128 | 最终版: 129 | ====== 130 | 131 | 如果是正常的python代码,我们一般这样写用来获取全部城市和页面。 132 | 133 | .. code-block:: python 134 | 135 | for city in ['北京','上海','深圳'] : 136 | for page_no in range(1,30): 137 | print(city,page_no) 138 | 139 | 但多重循环必须要顺序执行(串行),使用Botflow就要消除多重循环,并且可以并发执行。所以 140 | 使用itertools.product来生成所有组合。 141 | 142 | 143 | .. code-block:: python 144 | 145 | from itertools import product 146 | product(['北京','上海','深圳'],range(1,30)) 147 | 148 | 149 | 所以我们的代码可以改成, 150 | 151 | .. code-block:: python 152 | 153 | p=Pipe( 154 | product(['北京','上海','深圳'],range(1,30)), 155 | lambda c_p:HttpRequest( 156 | request_headers=request_headers, 157 | url = f"https://www.lagou.com/jobs/positionAjax.json?city={c_p[0]}&needAddtionalResult=false", 158 | payload = f"first=false&pn={c_p[1]}&kd=python", 159 | 160 | ), #构造请求 161 | 162 | 163 | ) 164 | 165 | Bot.run() 166 | 167 | 以上使用了python lambda ,和f-string 。product产生了所有组合, 在后续lambda表达式中,替换生成响应HTTP请求。 168 | 169 | 我们已经能取到全部的网页。现在加上解析函数 def parse() 和输出到函数的操作就可以完成全部任务: 170 | 171 | .. code-block:: python 172 | 173 | from botflow import * 174 | from botflow import HttpRequest 175 | from itertools import product 176 | 177 | 178 | 179 | request_headers=""" 180 | POST /jobs/positionAjax.json?city=%E5%8C%97%E4%BA%AC&needAddtionalResult=false HTTP/1.1 181 | Host: www.lagou.com 182 | Connection: keep-alive 183 | Content-Length: 26 184 | Pragma: no-cache 185 | Cache-Control: no-cache 186 | Origin: https://www.lagou.com 187 | X-Anit-Forge-Code: 0 188 | User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36 189 | Content-Type: application/x-www-form-urlencoded; charset=UTF-8 190 | Accept: application/json, text/javascript, */*; q=0.01 191 | X-Requested-With: XMLHttpRequest 192 | X-Anit-Forge-Token: None 193 | Referer: https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput= 194 | Accept-Encoding: gzip, deflate, br 195 | Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7 196 | Cookie: _ga=GA1.2.1690296475.1518425441; user_trace_token=20180212165041-cde58656-0fd1-11e8-8654-525400f775ce; LGUID=20180212165041-cde58a1b-0fd1-11e8-8654-525400f775ce; gray=resume; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%228310232%22%2C%22%24device_id%22%3A%22162fbe017557-0454a54748a333-336c7b05-1764000-162fbe01756131a%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%2C%22first_id%22%3A%22162fbe017557-0454a54748a333-336c7b05-1764000-162fbe01756131a%22%7D; LG_LOGIN_USER_ID=2c307fca8f7021f2bcb48e09504f48685acb6eac181de81c; showExpriedIndex=1; showExpriedCompanyHome=1; showExpriedMyPublish=1; hasDeliver=0; JSESSIONID=ABAAABAABEEAAJAE07FBC7D4DF87B66F2D4043894D889A7; _putrc=6371D2A6DE42118C; login=true; mds_u_n=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu63a7%5Cu5317%5Cu4eac; mds_u_ci=159558; X_HTTP_TOKEN=ee02191ab39aeafe8221a97bdbc5d06f; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1534149795,1534919704; yun_switch_company=1; _gid=GA1.2.236062443.1536480058; LGSID=20180909160058-7c6bb276-b406-11e8-b62b-5254005c3644; PRE_UTM=; PRE_HOST=; PRE_SITE=; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; gate_login_token=cf101a24f676e9e598be30fb739271f3ff8e76bfbec59f83; mds_u_cn=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu79d1%5Cu63a7%5Cu80a1%5Cu6709%5Cu9650%5Cu516c%5Cu53f8; mds_u_s_cn=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu670d; unick=18800118876; yun_push_authToken="JZfh+Grg9GvCxIVJVfHfI8KjD/G0bxVo0HtuvLII5/Xm2NlGNg1UsHxxDJqQFc9fnGoyeBZvhUfuvY8Mto3upBWRPaYAk7heQlCWp63hWDaDJ2uyAFAq3DTK7j24YxX7e7g1RVQvFTZW5KbQFd0sMQxCv4X/4NwrwsS/v5aXU+x4rucJXOpldXhUiavxhcCELWDotJ+bmNVwmAvQCptcy5e7czUcjiQC32Lco44BMYXrQ+AIOfEccJKHpj0vJ+ngq/27aqj1hWq8tEPFFjdnxMSfKgAnjbIEAX3F9CIW8BSiMHYmPBt7FDDY0CCVFICHr2dp5gQVGvhfbqg7VzvNsw=="; mds_login_authToken="OLzLRoAc/eD6AABAewCWpbIJ/eu8qBHKDjhbdDnbV9JdlQrgjiZ+LWjOaziTjnZP77x9C5OlLk4DFgWJFQ8ekmw27Av1P+GjJ8WNV82JuqEuRpQKnHSmTWaLosUgWRvrYi7/C/KiuNlQWdVzDdRn2Wir0LqzdL/PqN28NmThSXd4rucJXOpldXhUiavxhcCELWDotJ+bmNVwmAvQCptcy5e7czUcjiQC32Lco44BMYXrQ+AIOfEccJKHpj0vJ+ngq/27aqj1hWq8tEPFFjdnxMSfKgAnjbIEAX3F9CIW8BSiMHYmPBt7FDDY0CCVFICHr2dp5gQVGvhfbqg7VzvNsw=="; index_location_city=%E5%85%A8%E5%9B%BD; TG-TRACK-CODE=search_code; SEARCH_ID=dce8f7fa57814b2ba25a330df122cce5; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1536480724; LGRID=20180909161204-09522e65-b408-11e8-b62b-5254005c3644; _ga=GA1.2.1690296475.1518425441; user_trace_token=20180212165041-cde58656-0fd1-11e8-8654-525400f775ce; LGUID=20180212165041-cde58a1b-0fd1-11e8-8654-525400f775ce; gray=resume; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%228310232%22%2C%22%24device_id%22%3A%22162fbe017557-0454a54748a333-336c7b05-1764000-162fbe01756131a%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%2C%22first_id%22%3A%22162fbe017557-0454a54748a333-336c7b05-1764000-162fbe01756131a%22%7D; showExpriedIndex=1; showExpriedCompanyHome=1; showExpriedMyPublish=1; hasDeliver=0; JSESSIONID=ABAAABAABEEAAJAE07FBC7D4DF87B66F2D4043894D889A7; mds_u_n=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu63a7%5Cu5317%5Cu4eac; mds_u_ci=159558; X_HTTP_TOKEN=ee02191ab39aeafe8221a97bdbc5d06f; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1534149795,1534919704; yun_switch_company=1; _gid=GA1.2.236062443.1536480058; mds_u_cn=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu79d1%5Cu63a7%5Cu80a1%5Cu6709%5Cu9650%5Cu516c%5Cu53f8; mds_u_s_cn=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu670d; gate_login_token=21d2ec82a5c90746ab0a09e3014903592062af3c2a0e44a3; yun_push_authToken="K3X2PunJpOPcH6Jps7SH2GIRGCFgFXxWS73fruNGbe5BLuIDysbZl2SSsqWlCQOmGY8KGQl8UWoJ08WEvZtQHQd2R4sMO1Q6skl77olVWj4P6T4vls04NALTvIJxMxC4zd9CJ+eIEJBemWIFuPIzMIQenQ3GU870INQTDV8C2xh4rucJXOpldXhUiavxhcCELWDotJ+bmNVwmAvQCptcy5e7czUcjiQC32Lco44BMYXrQ+AIOfEccJKHpj0vJ+ngq/27aqj1hWq8tEPFFjdnxMSfKgAnjbIEAX3F9CIW8BSiMHYmPBt7FDDY0CCVFICHr2dp5gQVGvhfbqg7VzvNsw=="; login=false; unick=""; mds_login_authToken=""; _putrc=""; LG_LOGIN_USER_ID=""; index_location_city=%E5%8C%97%E4%BA%AC; TG-TRACK-CODE=index_search; _gat=1; LGSID=20180910134546-c3eaac0b-b4bc-11e8-b62b-5254005c3644; PRE_UTM=; PRE_HOST=; PRE_SITE=https%3A%2F%2Fwww.lagou.com%2F; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2Fjobs%2Flist_python%3FlabelWords%3D%26fromSearch%3Dtrue%26suginput%3D; SEARCH_ID=957986ea2ff146639cd6b94a73323a94; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1536558710; LGRID=20180910135149-9c548934-b4bd-11e8-b62b-5254005c3644 197 | """ 198 | def parse(i): 199 | salary=i['salary'].replace('k','') 200 | s=salary.split('-') 201 | if len(s)==2: 202 | l_salary=salary.split('-')[0] 203 | u_salary=salary.split('-')[1] 204 | else: 205 | l_salary=u_salary=salary 206 | return i['companyShortName'], l_salary,u_salary, i['city'], i['workYear'] 207 | 208 | p = Pipe( 209 | product(["北京","杭州","上海","深圳"], range(1, 30)), 210 | Delay(1), 211 | lambda c_p: HttpRequest( 212 | request_headers=request_headers, 213 | url=f"https://www.lagou.com/jobs/positionAjax.json?px=default&city={c_p[0]}&needAddtionalResult=false", 214 | payload=f"first=false&pn={c_p[1]}&kd=python", 215 | 216 | ), # 构造请求 217 | HttpLoader(), 218 | lambda r: r.json['content']['positionResult']['result'], 219 | parse, 220 | 221 | AioFile("ex_output/lagou_python.csv") 222 | ) 223 | 224 | Bot.run() 225 | 226 | 227 | 函数也可以用lambda来写: 228 | lambda r: (r['companyShortName'], r['salary'], r['city'], r['workYear']), 229 | 230 | 231 | Delay操作的作用,使用限制速度。每秒请求一个网页。 232 | 233 | 234 | 打开本地保存的csv文件。就可以进行后续分析了。 235 | .. image:: lagou5.jpg 236 | 237 | 238 | -------------------------------------------------------------------------------- /docs/faq.rst: -------------------------------------------------------------------------------- 1 | Frequently Asked Questions 2 | ========================== 3 | 4 | .. contents:: 5 | :local: -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | 2 | 3 | Welcome to Botflow's world! 4 | =================================== 5 | 6 | 7 | 8 | 9 | The Botflow applicaton is made with one or many pipes and run by ```Bot.run()```. 10 | The simplest applicaton looks like: 11 | 12 | .. code-block:: python 13 | 14 | from botflow import * 15 | Pipe(print) 16 | Bot.run() 17 | 18 | 19 | Concept of the Botflow is very simple. 20 | 21 | 22 | **Pipe** works at the Top level. It combines the Node and Route together. 23 | 24 | **Node** is callable: in python world, we have three callable things: 25 | 26 | - function 27 | - function object. Class with ```__call__``` function overridden 28 | - lambda 29 | 30 | 31 | 32 | **Route** is for duplicating data for multiple purposes. for simple applicaiton, you don't need route. 33 | Just a main pipe. 34 | 35 | case 1: when get a tick bitcoin price from api, 36 | you want save it to file and mysql at same time. 37 | 38 | .. code-block:: python 39 | 40 | Pipe( 41 | get_price, 42 | Branch(save_to_db), 43 | save_to_file 44 | ) 45 | 46 | 47 | 48 | 49 | 50 | 51 | case 2: crawler gets a google search result page. It may need to parse search result and next page link . 52 | 53 | 54 | .. code-block:: python 55 | 56 | Pipe( 57 | "https://www.google.com/search?q=kkyon+Botflow", 58 | HttpLoader(), 59 | Branch(parse_search_result,save_to_db), 60 | parse_all_page_url, 61 | HttpLoader(), 62 | parse_search_result, 63 | save_to_db 64 | ) 65 | 66 | 67 | The above two code blocks look like pseudo code, but they are workable samples. 68 | 69 | .. warning:: 70 | 71 | In this documentationm. Data, Message, Event are the same thing. 72 | 73 | 74 | .. toctree:: 75 | :maxdepth: 2 76 | :caption: Contents: 77 | 78 | bot/index 79 | pipe 80 | node 81 | route 82 | examples/index 83 | change/index 84 | 85 | .. toctree:: 86 | :maxdepth: 1 87 | 88 | faq 89 | 90 | 91 | 92 | 93 | 94 | Indices and tables 95 | ================== 96 | 97 | * :ref:`genindex` 98 | * :ref:`modindex` 99 | * :ref:`search` 100 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=databot 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/node.rst: -------------------------------------------------------------------------------- 1 | Node 2 | ==== 3 | 4 | .. contents:: 5 | :local: 6 | 7 | Node is callable thing .In python world ,we have three callable things , 8 | 9 | - function 10 | - function object. class with override ```__call__``` 11 | - lambada 12 | 13 | 14 | Major work of user of Botflow shouble be writing core logic function, 15 | for parsing, mapping,calculating , aggregating. It is the main purpose of Botflow desing. 16 | 17 | 18 | Pass into Node: 19 | --------------- 20 | Node must have only one parameter. the pass in value is from upflow node return value. 21 | 22 | 23 | Return from Node: 24 | ----------------- 25 | Node can return anything . list ,generator,raw value,tuple . 26 | 27 | .. attention:: 28 | 29 | - list will be unpacked into separate item . tuple will not be unpacked. 30 | - generator will be iterated by bot. so you can return a infinite generator to simulate a stream flow 31 | it is interested to test. 32 | - raw will put into queue. 33 | 34 | How to handle Excpetion: 35 | ------------------------ 36 | 37 | Exception behavior will act according to ```config.Exception_policy = config.Exception_raise ``` setting. 38 | 39 | :Exception_default: default exception policy is raise 40 | :Exception_raise: raise exception 41 | :Exception_ignore: ignore exception. exception raised from node will be suppressed. 42 | :Exception_retry: the value will put in input-queue after some delay. 43 | :Exception_pipein: the exception tread as returen value ,put in output queue. it will be usefull 44 | in blockedjoin route scenarios. 45 | 46 | 47 | 48 | .. contents:: 49 | :local: 50 | 51 | 52 | Timer 53 | ------ 54 | 55 | It will send a message in the pipe by timer param. delay, max_time until some finished 56 | 57 | ```Timer(self, delay=1, max_time=None, until=None)``` 58 | 59 | 60 | :dealy: the delay between every count 61 | :max_time: will stop when reach the max count time. 62 | :until: the function ref. Timer will count until function return True. 63 | 64 | 65 | HttpLoader 66 | ----------- 67 | 68 | Get a url and return the HTTP response 69 | 70 | Init parameter 71 | -------------- 72 | 73 | :timeout: 74 | default timeout=20 . 75 | 76 | Callable parameter: 77 | ------------------ 78 | 79 | can be call by string (url), and Httprequest. 80 | 81 | HttpResponse 82 | ------------ 83 | HttpResponse 84 | 85 | 86 | :json: return a json object 87 | :text: get text body of the httpresponse 88 | :xml: get lxml object 89 | :css: get css object 90 | 91 | 92 | HttpRequest 93 | ------------ 94 | 95 | :head: 96 | :body: 97 | :url: 98 | :proxy: 99 | 100 | 101 | 102 | AioFile 103 | ---------------- 104 | for file I/O. 105 | 106 | SpeedLimit 107 | ---------- 108 | 109 | limit the stream speed limit 110 | 111 | Delay 112 | ------ 113 | 114 | delay in special second. 115 | Zip 116 | ----- 117 | Wait for all branched to finish and merged the result into a tuple. 118 | 119 | Filter 120 | ----- 121 | 122 | Drop data from pipe if it does not match some condition 123 | 124 | 125 | -------------------------------------------------------------------------------- /docs/pipe.rst: -------------------------------------------------------------------------------- 1 | Pipe 2 | ==== 3 | .. toctree:: 4 | :maxdepth: 1 5 | 6 | application 7 | 8 | -------------------------------------------------------------------------------- /docs/route.rst: -------------------------------------------------------------------------------- 1 | Route 2 | ===== 3 | 4 | .. contents:: 5 | :local: 6 | 7 | Route is important concept of the Botflow. With it ,we can duplicate ,join,drop data. 8 | and route message to muliti target Node. 9 | 10 | ```def __init__(self, *args, route_type=object,route_func=None, share=True, join=False):``` 11 | 12 | 13 | :args: list of Node or Route. 14 | 15 | :share: True|False. default value *True* if keep the orignal data for upflow. 16 | 17 | :route_type: list of Type for route upflow message to Branch.default value *object* 18 | 19 | :route_func: a function handle for route. logic bewteen route_type and route_func work together with `and`. 20 | 21 | :join: True|False ,if Return the final message to parent Pipe. default value *False* 22 | 23 | 24 | 25 | Branch 26 | ------ 27 | most Basic route of the Botflow .it duplicate the parent from the parent pipe. 28 | 29 | 30 | Return 31 | ------ 32 | Route Return is derived from Branch with parameter (share=False,join=True) 33 | 34 | Branch,Return,Filter 35 | ==================== 36 | 37 | 38 | .. image:: Botflow_branch.jpg 39 | :width: 300 40 | 41 | 42 | 43 | :share: True|False. if keep the orignal data for parent pipe 44 | 45 | :route_type: list of Type for route upflow message to Branch 46 | 47 | :join: True|False ,if Return the final message to parent Pipe. 48 | 49 | 50 | 51 | Filter 52 | ------ 53 | Route Filter is derived from Branch .it will drop out message ,if not match (route_type,route_func) 54 | sametime. 55 | 56 | 57 | Join 58 | ---- 59 | Join is derived from Fork with parameter(share=False,join=True) . 60 | 61 | 62 | SendTo 63 | ------ 64 | Send the stream to speicaled Node. 65 | -------------------------------------------------------------------------------- /docs/route/databot_branch.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kkyon/botflow/3c06afb8c9c8e0ac962bd2d157a79efcf05d913a/docs/route/databot_branch.jpg -------------------------------------------------------------------------------- /docs/route/databot_fork.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kkyon/botflow/3c06afb8c9c8e0ac962bd2d157a79efcf05d913a/docs/route/databot_fork.jpg -------------------------------------------------------------------------------- /docs/route/databot_join.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kkyon/botflow/3c06afb8c9c8e0ac962bd2d157a79efcf05d913a/docs/route/databot_join.jpg -------------------------------------------------------------------------------- /docs/route/databot_return.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kkyon/botflow/3c06afb8c9c8e0ac962bd2d157a79efcf05d913a/docs/route/databot_return.jpg -------------------------------------------------------------------------------- /examples/Zip.py: -------------------------------------------------------------------------------- 1 | import logging 2 | logging.basicConfig(level=logging.DEBUG) 3 | logging.getLogger('asyncio').setLevel(logging.DEBUG) 4 | from botflow import * 5 | 6 | p1=Pipe(lambda x:x+1) 7 | p2=Pipe(lambda x:x+2) 8 | 9 | p=Pipe( 10 | 11 | Zip(p1,p2) 12 | 13 | 14 | ) 15 | 16 | print(p.run(range(0,10))) 17 | #BotFlow.render("ex_output/test") 18 | # BotFlow.run() -------------------------------------------------------------------------------- /examples/aiohttpserver_hello.py: -------------------------------------------------------------------------------- 1 | import logging 2 | logging.basicConfig(level=logging.DEBUG) 3 | from botflow import * 4 | 5 | from aiohttp import web 6 | 7 | 8 | 9 | p = Pipe( 10 | 11 | {"msg":"hello world!"} 12 | ) 13 | 14 | 15 | 16 | app = web.Application() 17 | 18 | app.add_routes([ 19 | web.get('/', p.aiohttp_json_handle) 20 | ]) 21 | 22 | 23 | Bot.run_app(app,port=8081) 24 | #BotFlow start web server http://0.0.0.0:8080 25 | 26 | -------------------------------------------------------------------------------- /examples/aiohttpserver_search.py: -------------------------------------------------------------------------------- 1 | from botflow import * 2 | from botflow import config 3 | from bs4 import BeautifulSoup 4 | from aiohttp import web 5 | import aiohttp 6 | import logging 7 | 8 | logging.basicConfig(level=logging.DEBUG) 9 | 10 | config.exception_policy=config.Exception_pipein 11 | 12 | def parse_search(response): 13 | # raise Exception() 14 | soup = BeautifulSoup(response.text, "lxml") 15 | items = soup.find_all('a', href=True) 16 | result = [] 17 | for rank, item in enumerate(items): 18 | if len(item.get_text())>10 and 'http' in item['href']: 19 | r={'title':item.get_text(),'href':item['href']} 20 | result.append(r) 21 | return result 22 | 23 | 24 | p=Pipe( 25 | 26 | lambda r:r.query.get('q',''), 27 | Join( 28 | lambda q:"https://www.bing.com/search?q={}".format(q), 29 | lambda q:"https://www.google.com/search?q={}".format(q), 30 | lambda q:"https://www.baidu.com/s?wd={}".format(q), 31 | ), 32 | 33 | Zip(n_stream=3), 34 | HttpLoader(timeout=3), 35 | parse_search, 36 | ) 37 | 38 | routes = web.RouteTableDef() 39 | @routes.get('/') 40 | async def json_handle(request): 41 | r = await p.run_async(request) 42 | return web.json_response(r) 43 | 44 | 45 | 46 | 47 | 48 | app = web.Application() 49 | #app.add_routes([web.get('/', json_handle)]) 50 | app.add_routes(routes) 51 | 52 | 53 | Bot.render('ex_output/httpserver') 54 | Bot.run_app(app) 55 | 56 | -------------------------------------------------------------------------------- /examples/aiohttpserver_websocket.py: -------------------------------------------------------------------------------- 1 | from botflow import * 2 | from botflow import config 3 | from bs4 import BeautifulSoup 4 | from aiohttp import web 5 | from botflow.node import Flat 6 | import aiohttp 7 | import logging 8 | 9 | logging.basicConfig(level=logging.DEBUG) 10 | 11 | 12 | # config.exception_policy=config.Exception_ignore 13 | 14 | def parse_search(response): 15 | # raise Exception() 16 | soup = BeautifulSoup(response.text, "lxml") 17 | items = soup.find_all('a', href=True) 18 | result = [] 19 | for rank, item in enumerate(items): 20 | if len(item.get_text()) > 10 and 'http' in item['href']: 21 | r = {'title': item.get_text(), 'href': item['href']} 22 | result.append(r) 23 | return result 24 | 25 | 26 | p = Pipe( 27 | 28 | lambda r: r.query['q'], 29 | Join( 30 | lambda q: "https://www.bing.com/search?q={}".format(q), 31 | lambda q: "https://www.google.com/search?q={}".format(q), 32 | lambda q: "https://www.baidu.com/s?wd={}".format(q), 33 | ), 34 | 35 | HttpLoader(), 36 | parse_search, 37 | Flat() 38 | ) 39 | 40 | 41 | 42 | 43 | async def websocket_handler(request): 44 | ws = web.WebSocketResponse() 45 | await ws.prepare(request) 46 | 47 | async for msg in ws: 48 | if msg.type == aiohttp.WSMsgType.TEXT: 49 | if msg.data == 'close': 50 | await ws.close() 51 | else: 52 | await p.write(msg.data) 53 | async for data in p.read(): 54 | await ws.send_str(data) 55 | 56 | 57 | elif msg.type == aiohttp.WSMsgType.ERROR: 58 | print('ws connection closed with exception %s' % 59 | ws.exception()) 60 | 61 | print('websocket connection closed') 62 | 63 | return ws 64 | 65 | 66 | app = web.Application() 67 | app.add_routes([ 68 | web.get('/ws', websocket_handler) 69 | ]) 70 | 71 | BotFlow.render('ex_output/httpserver') 72 | BotFlow.run_app(app) 73 | 74 | -------------------------------------------------------------------------------- /examples/amazon_book.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from botflow import *\n", 12 | "\n", 13 | "p=Pipe(\n", 14 | " range(10),\n", 15 | " lambda p:f\"https://www.amazon.com/s/ref=sr_pg_{p}?fst=p90x%3A1&page={p}&rh=n%3A283155%2Ck%3Apython&keywords=python&ie=UTF8&qid=1536500367\",\n", 16 | " HttpLoader(),\n", 17 | "\n", 18 | " lambda r:r.soup.get_all_links()\n", 19 | "\n", 20 | ")\n", 21 | "\n", 22 | "p.run(0)\n" 23 | ] 24 | } 25 | ], 26 | "metadata": { 27 | "kernelspec": { 28 | "display_name": "Python 2", 29 | "language": "python", 30 | "name": "python2" 31 | }, 32 | "language_info": { 33 | "codemirror_mode": { 34 | "name": "ipython", 35 | "version": 2 36 | }, 37 | "file_extension": ".py", 38 | "mimetype": "text/x-python", 39 | "name": "python", 40 | "nbconvert_exporter": "python", 41 | "pygments_lexer": "ipython2", 42 | "version": "2.7.6" 43 | } 44 | }, 45 | "nbformat": 4, 46 | "nbformat_minor": 0 47 | } 48 | -------------------------------------------------------------------------------- /examples/amazon_book.py: -------------------------------------------------------------------------------- 1 | from botflow import * 2 | from botflow.config import config 3 | config.exception_policy=config.Exception_ignore 4 | 5 | 6 | Pipe( 7 | range(10), 8 | lambda p:f"https://www.amazon.com/s/ref=sr_pg_{p}?fst=p90x%3A1&page={p}&rh=n%3A283155%2Ck%3Apython&keywords=python&ie=UTF8&qid=1536500367", 9 | HttpLoader(), 10 | 11 | #lambda r:r.soup.get_all_links(), 12 | lambda r:r.soup.select("a h2"), 13 | lambda r:r.get_text(), 14 | AioFile("ex_output/amazon_book.csv") 15 | 16 | ) 17 | 18 | Bot.run() 19 | -------------------------------------------------------------------------------- /examples/async.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import aiomysql 3 | 4 | loop = asyncio.get_event_loop() 5 | 6 | def f(): 7 | pass 8 | async def d(): 9 | return 'ddd' 10 | 11 | async def e(): 12 | 13 | r=await d() 14 | return r 15 | 16 | async def g(): 17 | yield 'gg' 18 | 19 | 20 | async def example(): 21 | r=await e() 22 | print(r) 23 | 24 | 25 | loop.run_until_complete(example()) -------------------------------------------------------------------------------- /examples/backpressure.py: -------------------------------------------------------------------------------- 1 | 2 | from botflow import * 3 | import datetime 4 | import logging 5 | logger=logging.getLogger("botflow.queue") 6 | #logger.setLevel(logging.DEBUG) 7 | class SpeedTest: 8 | def __init__(self): 9 | 10 | self.count=0 11 | self.start_time = datetime.datetime.now() 12 | 13 | 14 | def __call__(self, data): 15 | self.count+=1 16 | if self.count%10000==0: 17 | 18 | end = datetime.datetime.now() 19 | s = (end - self.start_time).total_seconds() 20 | speed_now = self.count / s 21 | print(f"speed now {speed_now}") 22 | 23 | self.count = 0 24 | self.start_time = datetime.datetime.now() 25 | 26 | 27 | return data 28 | 29 | 30 | Pipe( 31 | range(100000000000), 32 | SpeedTest(), 33 | 34 | 35 | ) 36 | 37 | BotFlow.run() -------------------------------------------------------------------------------- /examples/baidu_spider.py: -------------------------------------------------------------------------------- 1 | from botflow import BotFlow,Pipe,HttpLoader,Branch,AioFile 2 | from bs4 import BeautifulSoup 3 | import logging 4 | logging.basicConfig(level=logging.DEBUG) 5 | 6 | 7 | 8 | #定义解析结构 9 | class ResultItem: 10 | 11 | def __init__(self): 12 | self.id: str = '' 13 | self.name: str = '' 14 | self.url: str = ' ' 15 | self.page_rank: int = 0 16 | self.page_no: int = 0 17 | 18 | def __repr__(self): 19 | return '%s,%s,%d,%d'%(str(self.id),self.name,self.page_no,self.page_rank) 20 | 21 | 22 | # 解析具体条目 23 | def get_all_items(response): 24 | soup = BeautifulSoup(response.text, "lxml") 25 | items = soup.select('div.result.c-container') 26 | #result = [] 27 | for rank, item in enumerate(items): 28 | import uuid 29 | id = uuid.uuid4() 30 | r = ResultItem() 31 | r.id = id 32 | r.page_rank = rank 33 | r.name = item.h3.get_text() 34 | yield r 35 | #result.append(r) 36 | #return result 37 | 38 | 39 | # 解析分页链接 40 | def get_all_page_url(response): 41 | itemList = [] 42 | soup = BeautifulSoup(response.text, "lxml") 43 | page = soup.select('div#page') 44 | for item in page[0].find_all('a'): 45 | href = item.get('href') 46 | no = item.get_text() 47 | if '下一页' in no: 48 | break 49 | yield 'https://www.baidu.com' + href 50 | 51 | # return itemList 52 | 53 | 54 | def main(): 55 | words = ['贸易战', '世界杯'] 56 | baidu_url = 'https://www.baidu.com/s?wd=%s' 57 | urls = [baidu_url % (word) for word in words] 58 | 59 | 60 | 61 | 62 | outputfile=AioFile('ex_output/baidu.txt') 63 | Pipe( 64 | urls, 65 | HttpLoader(), 66 | Branch(get_all_items,outputfile), 67 | Branch(get_all_page_url, HttpLoader(), get_all_items, outputfile), 68 | 69 | ) 70 | #生成流程图 71 | BotFlow.debug_print() 72 | BotFlow.render('ex_output/baiduspider') 73 | BotFlow.run() 74 | 75 | 76 | main() 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /examples/baidu_spider_progress.py: -------------------------------------------------------------------------------- 1 | from botflow import Pipe, Branch, Timer 2 | from botflow import BotFlow 3 | from bs4 import BeautifulSoup 4 | from dataclasses import dataclass 5 | from botflow.ex.http import HttpLoader 6 | from botflow.config import config 7 | 8 | 9 | @dataclass 10 | class ResultItem: 11 | id: str = '' 12 | name: str = '' 13 | url: str = ' ' 14 | page_rank: int = 0 15 | page_no: int = 0 16 | 17 | def __repr__(self): 18 | return self.name 19 | 20 | 21 | @dataclass 22 | class UrlItem: 23 | name: str 24 | url: str 25 | 26 | 27 | # 解析具体条目 28 | def get_all_items(response): 29 | soup = BeautifulSoup(response.text, "lxml") 30 | items = soup.select('div.result.c-container') 31 | result = [] 32 | for rank, item in enumerate(items): 33 | import uuid 34 | id = uuid.uuid4() 35 | r = ResultItem() 36 | r.id = id 37 | r.page_rank = rank 38 | r.name = item.h3.get_text() 39 | yield r 40 | 41 | 42 | 43 | # 解析 分页 链接 44 | def get_all_page_url(response): 45 | itemList = [] 46 | #BD_URL='https://180.97.33.108' # 47 | BD_URL='https://www.baidu.com' 48 | soup = BeautifulSoup(response.text, "lxml") 49 | page = soup.select('div#page') 50 | for item in page[0].find_all('a'): 51 | href = item.get('href') 52 | no = item.get_text() 53 | if '下一页' in no: 54 | break 55 | yield BD_URL + href 56 | 57 | 58 | 59 | 60 | result = [] 61 | 62 | delay=5 63 | def collect(i): 64 | result.append(i) 65 | 66 | 67 | def show_progress(count): 68 | n=len(result) 69 | speed=n/(count*delay) 70 | print('got len item %s speed:%03f per second,total cost: %ss'%(n,speed,count*delay)) 71 | 72 | 73 | 74 | config.exception_policy=config.Exception_ignore 75 | def main(): 76 | words = ['贸易战', '世界杯']*50 77 | baidu_url = 'https://www.baidu.com/s?wd=%s' 78 | urls = [baidu_url % (word) for word in words] 79 | 80 | # make data flow net 81 | p1=Pipe( 82 | urls, 83 | HttpLoader(), 84 | Branch(get_all_items, collect), 85 | Branch(get_all_page_url, HttpLoader(), get_all_items, collect), 86 | 87 | ) 88 | Pipe(Timer(delay=delay), show_progress) 89 | BotFlow.run(silent=True) 90 | 91 | 92 | main() 93 | 94 | # 95 | # ---run result---- 96 | #post man test result for a page requrest ;1100ms 97 | # 98 | # PING www.a.shifen.com (180.97.33.108): 56 data bytes 99 | # 64 bytes from 180.97.33.108: icmp_seq=0 ttl=55 time=41.159 ms 100 | 101 | # got len item 9274 speed:52.994286 per second,total cost: 175s 102 | # got len item 9543 speed:53.016667 per second,total cost: 180s 103 | # got len item 9614 speed:51.967568 per second,total cost: 185s 104 | 105 | 106 | #best test data 107 | 108 | #25 pages per seconde. 109 | # got len item 1540 speed:102.666667 per second,total cost: 15s 110 | # got len item 2549 speed:127.450000 per second,total cost: 20s 111 | # got len item 3450 speed:138.000000 per second,total cost: 25s 112 | # got len item 4843 speed:161.433333 per second,total cost: 30s 113 | # got len item 6070 speed:173.428571 per second,total cost: 35s 114 | # got len item 6826 speed:170.650000 per second,total cost: 40s 115 | # got len item 7773 speed:172.733333 per second,total cost: 45s 116 | # got len item 8681 speed:173.620000 per second,total cost: 50s 117 | # got len item 9700 speed:176.363636 per second,total cost: 55s 118 | 119 | 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /examples/basic.py: -------------------------------------------------------------------------------- 1 | from botflow import * 2 | 3 | p = Pipe( 4 | range(3), 5 | lambda 6 | p: f"https://www.amazon.com/s/ref=sr_pg_{p}?fst=p90x%3A1&page={p}&rh=n%3A283155%2Ck%3Apython&keywords=python&ie=UTF8&qid=1536500367", 7 | HttpLoader(), 8 | 9 | lambda r: r.soup.find_all("li"), 10 | Flat() 11 | ) 12 | 13 | links = p.run(0) 14 | 15 | print(links) 16 | #li=Pipe(links).Flat().run() -------------------------------------------------------------------------------- /examples/bitcoin_ticker.py: -------------------------------------------------------------------------------- 1 | from botflow import Pipe, Join, Return,Timer,Zip 2 | from botflow import BotFlow 3 | from botflow.ex.http import HttpLoader 4 | import time 5 | import datetime 6 | from botflow.config import config 7 | from botflow.node import print_list 8 | class Tick(object): 9 | 10 | 11 | def __init__(self): 12 | self.ask=None 13 | self.bid=None 14 | self.exchange='' 15 | self.time=None 16 | def __repr__(self): 17 | 18 | st = datetime.datetime.fromtimestamp(self.time).strftime('%Y-%m-%d %H:%M:%S') 19 | return "{} {} ask:{} bid:{}".format(self.exchange,st,self.ask,self.bid) 20 | 21 | def parse_kraken(response): 22 | json=response.json 23 | t=Tick() 24 | t.exchange='kraken' 25 | t.bid=json['result']['XXBTZUSD']['b'][0] 26 | t.ask = json['result']['XXBTZUSD']['a'][0] 27 | t.time=time.time() 28 | return t 29 | 30 | def parse_bittrex(response): 31 | json=response.json 32 | t=Tick() 33 | t.exchange='bittrex' 34 | t.bid=json['result']['Bid'] 35 | t.ask = json['result']['Ask'] 36 | t.time=time.time() 37 | return t 38 | 39 | def parse_bitstamp(response): 40 | json=response.json 41 | t=Tick() 42 | t.exchange='bitstamp' 43 | t.bid=float(json['bid']) 44 | t.ask=float(json['ask']) 45 | t.time=time.time() 46 | return t 47 | 48 | #https://api.bitfinex.com/v1/ticker/btcusd 49 | def parse_bitfinex(response): 50 | json=response.json 51 | t=Tick() 52 | t.exchange='bitfinex' 53 | t.bid=float(json['bid']) 54 | t.ask=float(json['ask']) 55 | t.time=time.time() 56 | return t 57 | #https://bitpay.com/api/rates 58 | def parse_bitpay(response): 59 | json=response.json 60 | t=Tick() 61 | t.exchange='bitpay' 62 | for p in json: 63 | if p['code']=='USD': 64 | t.bid=p['rate'] 65 | t.ask=t.bid 66 | t.time=time.time() 67 | 68 | return t 69 | #http://api.coindesk.com/v1/bpi/currentprice.json 70 | 71 | def parse_coindesk(response): 72 | json=response.json 73 | t=Tick() 74 | t.exchange='coindesk' 75 | t.bid = json['bpi']['USD']['rate_float'] 76 | t.ask = t.bid 77 | t.time = time.time() 78 | return t 79 | 80 | config.exception_policy=config.Exception_pipein 81 | 82 | def main(): 83 | 84 | httpload=HttpLoader(timeout=2) 85 | Pipe( 86 | 87 | Timer(delay=2,max_time=5), 88 | Join( 89 | Return("https://api.kraken.com/0/public/Ticker?pair=XBTUSD",httpload , parse_kraken), 90 | Return("https://bittrex.com/api/v1.1/public/getticker?market=USD-BTC", httpload, parse_bittrex), 91 | Return("https://www.bitstamp.net/api/ticker/", httpload, parse_bitstamp), 92 | Return("https://api.bitfinex.com/v1/ticker/btcusd", httpload, parse_bitfinex), 93 | Return("https://bitpay.com/api/rates", httpload, parse_bitpay), 94 | Return("http://api.coindesk.com/v1/bpi/currentprice.json", httpload, parse_coindesk), 95 | 96 | 97 | 98 | ), 99 | 100 | Zip(n_stream=6), 101 | print_list 102 | 103 | ) 104 | 105 | BotFlow.render('ex_output/bitcoin_arbitrage') 106 | BotFlow.run() 107 | 108 | 109 | 110 | main() 111 | -------------------------------------------------------------------------------- /examples/bitcoin_ticker_stream.py: -------------------------------------------------------------------------------- 1 | from botflow import Pipe, Join, Return,Timer,Branch 2 | from botflow import BotFlow 3 | from botflow.ex.http import HttpLoader 4 | 5 | import time 6 | import datetime 7 | from botflow.config import config 8 | 9 | 10 | class Tick(object): 11 | 12 | 13 | def __init__(self): 14 | self.ask=None 15 | self.bid=None 16 | self.exchange='' 17 | self.time=None 18 | def __repr__(self): 19 | st = datetime.datetime.fromtimestamp(self.time).strftime('%Y-%m-%d %H:%M:%S') 20 | return "{} {} ask:{} bid:{}".format(self.exchange,st,self.ask,self.bid) 21 | 22 | def parse_kraken(response): 23 | json=response.json 24 | t=Tick() 25 | t.exchange='kraken' 26 | t.bid=json['result']['XXBTZUSD']['b'][0] 27 | t.ask = json['result']['XXBTZUSD']['a'][0] 28 | t.time=time.time() 29 | return t 30 | 31 | def parse_bittrex(response): 32 | json=response.json 33 | t=Tick() 34 | t.exchange='bittrex' 35 | t.bid=json['result']['Bid'] 36 | t.ask = json['result']['Ask'] 37 | t.time=time.time() 38 | return t 39 | 40 | 41 | 42 | config.exception_policy=config.Exception_ignore 43 | 44 | def print_list(d:list): 45 | print(d) 46 | return d 47 | 48 | def main(): 49 | 50 | 51 | hget=HttpLoader(timeout=2) 52 | 53 | Pipe( 54 | 55 | Timer(delay=3,max_time=5), 56 | 57 | Join( 58 | Return("https://api.kraken.com/0/public/Ticker?pair=XBTUSD", hget, parse_kraken), 59 | Return("https://bittrex.com/api/v1.1/public/getticker?market=USD-BTC", hget, parse_bittrex), 60 | 61 | ), 62 | print, 63 | 64 | ) 65 | 66 | BotFlow.render('ex_output/bitcoin_arbitrage') 67 | BotFlow.run() 68 | 69 | 70 | 71 | main() 72 | -------------------------------------------------------------------------------- /examples/boosttype.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | logging.basicConfig(level=logging.DEBUG) 4 | from botflow.config import config 5 | config.default_queue_max_size=0 6 | 7 | logging.debug(config) 8 | from botflow import Pipe,Timer,Branch 9 | from botflow import BotFlow 10 | from botflow.node import Node 11 | import time 12 | 13 | #it will block whole main thread 14 | 15 | @Node.boost 16 | def very_slow(a): 17 | print('i am going to sleep') 18 | time.sleep(10) 19 | print('i am aweek') 20 | 21 | 22 | 23 | def main(): 24 | 25 | 26 | Pipe( 27 | Timer(delay=1,max_time=10), 28 | Branch(very_slow), 29 | print, 30 | 31 | ) 32 | 33 | BotFlow.run() 34 | 35 | 36 | main() 37 | -------------------------------------------------------------------------------- /examples/chainable.py: -------------------------------------------------------------------------------- 1 | import logging 2 | logging.basicConfig(level=logging.DEBUG) 3 | from botflow import * 4 | from botflow.config import config 5 | config.exception_policy=config.Exception_pipein 6 | 7 | p=Pipe( 8 | range(1), 9 | lambda p:f"https://www.amazon.com/s/ref=sr_pg_{p}?fst=p90x%3A1&page={p}&rh=n%3A283155%2Ck%3Apython&keywords=python&ie=UTF8&qid=1536500367", 10 | HttpLoader(), 11 | 12 | lambda r:r.soup.find_all("li"), 13 | Flat() 14 | ) 15 | 16 | links=p.run(0) 17 | print(len(links)) 18 | p_get_img_src=Pipe(lambda t:t.select("a img"),lambda t:t['src']) 19 | 20 | p_get_title=Pipe(lambda t:t.select("a h2"),lambda t:t.get_text()) 21 | p_get_price=Pipe(lambda t:t.select("a > span.a-offscreen"),lambda t:t.get_text()) 22 | 23 | item_parse=Pipe(Zip(p_get_price,p_get_title,p_get_img_src)).Filter(lambda i : i[0]) 24 | 25 | 26 | 27 | 28 | r=item_parse.run(links) 29 | print(r) 30 | print(len(r)) -------------------------------------------------------------------------------- /examples/crawler.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from botflow import * 3 | from botflow.route import Link 4 | from botflow.config import config 5 | import datetime 6 | 7 | 8 | 9 | config.default_queue_max_size = 0 10 | # logging.basicConfig(level=logging.DEBUG) 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | start = datetime.datetime.now() 15 | 16 | seen = set() 17 | 18 | count = 1 19 | 20 | 21 | def print_speed(): 22 | end = datetime.datetime.now() 23 | s = (end - start).total_seconds() 24 | print(f"count {count} time {s} speed{count/s}") 25 | # QueueManager().debug_print() 26 | 27 | 28 | def filter_out(url): 29 | global count 30 | if 'http' not in url: 31 | url = "http://127.0.0.1:8080{}".format(url) 32 | 33 | if url in seen: 34 | return None 35 | 36 | count += 1 37 | 38 | if count % 5000 == 0: 39 | print_speed() 40 | 41 | seen.add(url) 42 | return url 43 | 44 | 45 | def find_all_links(r): 46 | for a in r.soup.find_all('a', href=True): 47 | yield a.get('href') 48 | 49 | 50 | b = Pipe( 51 | 52 | filter_out, 53 | HttpLoader(), 54 | find_all_links, 55 | ) 56 | 57 | Pipe( 58 | "http://127.0.0.1:8080/", 59 | b, 60 | Link(b), 61 | 62 | ) 63 | Bot.render('ex_output/crawler') 64 | 65 | try: 66 | 67 | BotFlow.debug_print() 68 | BotFlow.run() 69 | 70 | except KeyboardInterrupt: 71 | 72 | BotFlow.debug_print() 73 | 74 | except: 75 | raise 76 | BotFlow.debug_print() 77 | print_speed() 78 | BotFlow.stop() 79 | -------------------------------------------------------------------------------- /examples/ex_output/README.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kkyon/botflow/3c06afb8c9c8e0ac962bd2d157a79efcf05d913a/examples/ex_output/README.rst -------------------------------------------------------------------------------- /examples/lagou_crawler.py: -------------------------------------------------------------------------------- 1 | from botflow import * 2 | from botflow import HttpRequest 3 | from itertools import product 4 | 5 | 6 | 7 | request_headers=""" 8 | POST /jobs/positionAjax.json?city=%E5%8C%97%E4%BA%AC&needAddtionalResult=false HTTP/1.1 9 | Host: www.lagou.com 10 | Connection: keep-alive 11 | Content-Length: 26 12 | Pragma: no-cache 13 | Cache-Control: no-cache 14 | Origin: https://www.lagou.com 15 | X-Anit-Forge-Code: 0 16 | User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36 17 | Content-Type: application/x-www-form-urlencoded; charset=UTF-8 18 | Accept: application/json, text/javascript, */*; q=0.01 19 | X-Requested-With: XMLHttpRequest 20 | X-Anit-Forge-Token: None 21 | Referer: https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput= 22 | Accept-Encoding: gzip, deflate, br 23 | Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7 24 | Cookie: _ga=GA1.2.1690296475.1518425441; user_trace_token=20180212165041-cde58656-0fd1-11e8-8654-525400f775ce; LGUID=20180212165041-cde58a1b-0fd1-11e8-8654-525400f775ce; gray=resume; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%228310232%22%2C%22%24device_id%22%3A%22162fbe017557-0454a54748a333-336c7b05-1764000-162fbe01756131a%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%2C%22first_id%22%3A%22162fbe017557-0454a54748a333-336c7b05-1764000-162fbe01756131a%22%7D; LG_LOGIN_USER_ID=2c307fca8f7021f2bcb48e09504f48685acb6eac181de81c; showExpriedIndex=1; showExpriedCompanyHome=1; showExpriedMyPublish=1; hasDeliver=0; JSESSIONID=ABAAABAABEEAAJAE07FBC7D4DF87B66F2D4043894D889A7; _putrc=6371D2A6DE42118C; login=true; mds_u_n=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu63a7%5Cu5317%5Cu4eac; mds_u_ci=159558; X_HTTP_TOKEN=ee02191ab39aeafe8221a97bdbc5d06f; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1534149795,1534919704; yun_switch_company=1; _gid=GA1.2.236062443.1536480058; LGSID=20180909160058-7c6bb276-b406-11e8-b62b-5254005c3644; PRE_UTM=; PRE_HOST=; PRE_SITE=; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; gate_login_token=cf101a24f676e9e598be30fb739271f3ff8e76bfbec59f83; mds_u_cn=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu79d1%5Cu63a7%5Cu80a1%5Cu6709%5Cu9650%5Cu516c%5Cu53f8; mds_u_s_cn=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu670d; unick=18800118876; yun_push_authToken="JZfh+Grg9GvCxIVJVfHfI8KjD/G0bxVo0HtuvLII5/Xm2NlGNg1UsHxxDJqQFc9fnGoyeBZvhUfuvY8Mto3upBWRPaYAk7heQlCWp63hWDaDJ2uyAFAq3DTK7j24YxX7e7g1RVQvFTZW5KbQFd0sMQxCv4X/4NwrwsS/v5aXU+x4rucJXOpldXhUiavxhcCELWDotJ+bmNVwmAvQCptcy5e7czUcjiQC32Lco44BMYXrQ+AIOfEccJKHpj0vJ+ngq/27aqj1hWq8tEPFFjdnxMSfKgAnjbIEAX3F9CIW8BSiMHYmPBt7FDDY0CCVFICHr2dp5gQVGvhfbqg7VzvNsw=="; mds_login_authToken="OLzLRoAc/eD6AABAewCWpbIJ/eu8qBHKDjhbdDnbV9JdlQrgjiZ+LWjOaziTjnZP77x9C5OlLk4DFgWJFQ8ekmw27Av1P+GjJ8WNV82JuqEuRpQKnHSmTWaLosUgWRvrYi7/C/KiuNlQWdVzDdRn2Wir0LqzdL/PqN28NmThSXd4rucJXOpldXhUiavxhcCELWDotJ+bmNVwmAvQCptcy5e7czUcjiQC32Lco44BMYXrQ+AIOfEccJKHpj0vJ+ngq/27aqj1hWq8tEPFFjdnxMSfKgAnjbIEAX3F9CIW8BSiMHYmPBt7FDDY0CCVFICHr2dp5gQVGvhfbqg7VzvNsw=="; index_location_city=%E5%85%A8%E5%9B%BD; TG-TRACK-CODE=search_code; SEARCH_ID=dce8f7fa57814b2ba25a330df122cce5; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1536480724; LGRID=20180909161204-09522e65-b408-11e8-b62b-5254005c3644; _ga=GA1.2.1690296475.1518425441; user_trace_token=20180212165041-cde58656-0fd1-11e8-8654-525400f775ce; LGUID=20180212165041-cde58a1b-0fd1-11e8-8654-525400f775ce; gray=resume; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%228310232%22%2C%22%24device_id%22%3A%22162fbe017557-0454a54748a333-336c7b05-1764000-162fbe01756131a%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%2C%22first_id%22%3A%22162fbe017557-0454a54748a333-336c7b05-1764000-162fbe01756131a%22%7D; showExpriedIndex=1; showExpriedCompanyHome=1; showExpriedMyPublish=1; hasDeliver=0; JSESSIONID=ABAAABAABEEAAJAE07FBC7D4DF87B66F2D4043894D889A7; mds_u_n=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu63a7%5Cu5317%5Cu4eac; mds_u_ci=159558; X_HTTP_TOKEN=ee02191ab39aeafe8221a97bdbc5d06f; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1534149795,1534919704; yun_switch_company=1; _gid=GA1.2.236062443.1536480058; mds_u_cn=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu79d1%5Cu63a7%5Cu80a1%5Cu6709%5Cu9650%5Cu516c%5Cu53f8; mds_u_s_cn=%5Cu8d1d%5Cu58f3%5Cu91d1%5Cu670d; gate_login_token=21d2ec82a5c90746ab0a09e3014903592062af3c2a0e44a3; yun_push_authToken="K3X2PunJpOPcH6Jps7SH2GIRGCFgFXxWS73fruNGbe5BLuIDysbZl2SSsqWlCQOmGY8KGQl8UWoJ08WEvZtQHQd2R4sMO1Q6skl77olVWj4P6T4vls04NALTvIJxMxC4zd9CJ+eIEJBemWIFuPIzMIQenQ3GU870INQTDV8C2xh4rucJXOpldXhUiavxhcCELWDotJ+bmNVwmAvQCptcy5e7czUcjiQC32Lco44BMYXrQ+AIOfEccJKHpj0vJ+ngq/27aqj1hWq8tEPFFjdnxMSfKgAnjbIEAX3F9CIW8BSiMHYmPBt7FDDY0CCVFICHr2dp5gQVGvhfbqg7VzvNsw=="; login=false; unick=""; mds_login_authToken=""; _putrc=""; LG_LOGIN_USER_ID=""; index_location_city=%E5%8C%97%E4%BA%AC; TG-TRACK-CODE=index_search; _gat=1; LGSID=20180910134546-c3eaac0b-b4bc-11e8-b62b-5254005c3644; PRE_UTM=; PRE_HOST=; PRE_SITE=https%3A%2F%2Fwww.lagou.com%2F; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2Fjobs%2Flist_python%3FlabelWords%3D%26fromSearch%3Dtrue%26suginput%3D; SEARCH_ID=957986ea2ff146639cd6b94a73323a94; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1536558710; LGRID=20180910135149-9c548934-b4bd-11e8-b62b-5254005c3644 25 | """ 26 | def parse(i): 27 | salary=i['salary'].replace('k','') 28 | s=salary.split('-') 29 | if len(s)==2: 30 | l_salary=salary.split('-')[0] 31 | u_salary=salary.split('-')[1] 32 | else: 33 | l_salary=u_salary=salary 34 | return i['companyShortName'], l_salary,u_salary, i['city'], i['workYear'] 35 | 36 | p = Pipe( 37 | product(["北京","杭州","上海","深圳"], range(1, 30)), 38 | Delay(1), 39 | lambda c_p: HttpRequest( 40 | request_headers=request_headers, 41 | url=f"https://www.lagou.com/jobs/positionAjax.json?px=default&city={c_p[0]}&needAddtionalResult=false", 42 | payload=f"first=false&pn={c_p[1]}&kd=python", 43 | 44 | ), # 构造请求 45 | HttpLoader(), 46 | lambda r: r.json['content']['positionResult']['result'], 47 | parse, 48 | #lambda r: (r['companyShortName'], r['salary'], r['city'], r['workYear']), 49 | AioFile("ex_output/lagou_python.csv") 50 | ) 51 | 52 | Bot.run() 53 | -------------------------------------------------------------------------------- /examples/loop3.py: -------------------------------------------------------------------------------- 1 | from botflow import Pipe,Branch,BotFlow 2 | from botflow.botframe import BotFrame 3 | class Sum(object): 4 | def __init__(self): 5 | self.sum=0 6 | def __call__(self, i): 7 | self.sum+=i 8 | return self.sum 9 | def __repr__(self): 10 | return 'sum:'+str(self.sum) 11 | 12 | op_sum=Sum() 13 | 14 | def main(): 15 | Pipe( 16 | range(10), 17 | range(10), 18 | Branch(op_sum,print) 19 | ) 20 | 21 | BotFlow.run() 22 | print(op_sum) 23 | main() 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /examples/perf_test/aiohttpserver.py: -------------------------------------------------------------------------------- 1 | from botflow import * 2 | from aiohttp import web 3 | 4 | 5 | 6 | p = Pipe( 7 | 8 | {"msg":"hello world!"} 9 | ) 10 | 11 | 12 | 13 | app = web.Application() 14 | 15 | # routes = web.RouteTableDef() 16 | # 17 | # @routes.get('/hello') 18 | # def pipe_json_wrap(p): 19 | # async def _wrap(request): 20 | # r=await p(request) 21 | # return web.json_response(r) 22 | # 23 | # return _wrap 24 | # 25 | # 26 | app.add_routes([ 27 | web.get('/', p.aiohttp_json_handle()) 28 | ]) 29 | 30 | 31 | BotFlow.run_app(app) 32 | 33 | -------------------------------------------------------------------------------- /examples/perf_test/httpclinet.py: -------------------------------------------------------------------------------- 1 | from botflow import * 2 | import datetime 3 | import time 4 | 5 | count=0 6 | def check_and_count(i): 7 | global count 8 | count+=1 9 | 10 | 11 | # docker run -p 80:80 kennethreitz/httpbin 12 | Pipe( 13 | range(10000), 14 | "http://127.0.0.1:80/get", 15 | HttpLoader(), 16 | check_and_count 17 | 18 | 19 | ) 20 | 21 | start = datetime.datetime.now() 22 | BotFlow.run() 23 | 24 | end = datetime.datetime.now() 25 | 26 | print(end-start) 27 | print("count %d",count) 28 | 29 | time.sleep(100) -------------------------------------------------------------------------------- /examples/replayex.py: -------------------------------------------------------------------------------- 1 | from botflow import Pipe, Loop, Fork 2 | from botflow.botframe import BotFrame 3 | from botflow.config import config 4 | import time 5 | 6 | def double(a): 7 | print('double %d'%a) 8 | time.sleep(1) 9 | return 2*a 10 | count=0 11 | def triple(a): 12 | global count 13 | count+=1 14 | if count>6: 15 | raise Exception() 16 | pass 17 | return 3*a 18 | 19 | # 20 | config.replay_mode=True 21 | 22 | def main(): 23 | Pipe( 24 | 25 | Loop(range(10)), 26 | double, 27 | triple, 28 | print 29 | 30 | 31 | ) 32 | 33 | BotFrame.run() 34 | 35 | 36 | 37 | main() 38 | -------------------------------------------------------------------------------- /examples/requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4 2 | lxml 3 | -------------------------------------------------------------------------------- /examples/simple_bitcoin_price.py: -------------------------------------------------------------------------------- 1 | from botflow import * 2 | 3 | 4 | def main(): 5 | Pipe( 6 | 7 | Timer(delay=2,max_time=10), # send timer data to pipe every 2 seconds 8 | "http://api.coindesk.com/v1/bpi/currentprice.json", # send url to pipe when timer trigger 9 | HttpLoader(), # read url and load http response 10 | lambda r: r.json['bpi']['USD']['rate_float'], # read http response and parse as json 11 | print, # print out 12 | 13 | ) 14 | 15 | Bot.render('ex_output/simple_bitcoin_price') 16 | Bot.run() 17 | # main() 18 | print('-----chian style----') 19 | from botflow import * 20 | 21 | p_cd_bitcoin = Pipe().Timer(delay=2,max_time=10).Loop("http://api.coindesk.com/v1/bpi/currentprice.json") \ 22 | .HttpLoader().Map(lambda r: r.json['bpi']['USD']['rate_float']).Map(print) 23 | 24 | p_cd_bitcoin.run() 25 | print('--run twinice---') 26 | p_cd_bitcoin.run() -------------------------------------------------------------------------------- /examples/zip_join.py: -------------------------------------------------------------------------------- 1 | from botflow import Pipe,Join 2 | from botflow.node import Zip 3 | from botflow import BotFlow 4 | 5 | 6 | 7 | def double(i): 8 | return i*2 9 | 10 | def triple(i): 11 | return i*3 12 | 13 | def plus_one(i): 14 | return i+1 15 | 16 | def print_out(m:list): 17 | print(m) 18 | 19 | def main(): 20 | Pipe( 21 | 22 | range(10), 23 | Join( 24 | double, 25 | triple 26 | ), 27 | plus_one, 28 | plus_one, 29 | 30 | 31 | Zip(n_stream=2), 32 | print_out, 33 | 34 | ) 35 | 36 | BotFlow.render('ex_output/blockedjoin') 37 | BotFlow.run() 38 | 39 | 40 | 41 | main() 42 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp 2 | beautifulsoup4 3 | lxml 4 | graphviz -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup 3 | 4 | import codecs 5 | 6 | 7 | def read(fname): 8 | return codecs.open(os.path.join(os.path.dirname(__file__), fname),encoding="utf-8").read() 9 | 10 | 11 | setup(name='botflow', 12 | description='Data-driven and Reactive programming framework' 13 | ' ', 14 | long_description=read("README.rst"), 15 | version='0.2.0', 16 | url='https://github.com/kkyx/botflow', 17 | author='Guojian Li', 18 | author_email='guojianlee@gmail.com', 19 | license='BSD', 20 | python_requires=">=3.6.5", 21 | classifiers=[ 22 | 'Development Status :: 3 - Alpha', 23 | 'Intended Audience :: Developers', 24 | 'License :: OSI Approved :: BSD License', 25 | 'Programming Language :: Python :: 3' 26 | ], 27 | packages=['botflow', 'botflow.ex'], 28 | install_requires=[ 29 | 'aiohttp>=3.3.0', 30 | 'graphviz', 31 | 'beautifulsoup4', 32 | 'lxml' 33 | 34 | 35 | ], 36 | 37 | ) 38 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kkyon/botflow/3c06afb8c9c8e0ac962bd2d157a79efcf05d913a/tests/__init__.py -------------------------------------------------------------------------------- /tests/objet_ini.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class Config: 4 | def __init__(self): 5 | self._max_size=128 6 | 7 | @property 8 | def max_size(self): 9 | print('get max %s',self._max_size) 10 | return self._max_size 11 | 12 | @max_size.setter 13 | def max_size(self,v): 14 | 15 | self._max_size=v 16 | print('set max %s', self._max_size) 17 | 18 | config=Config() 19 | 20 | 21 | def init(max_size=config.max_size): 22 | print("max_size %s",max_size) 23 | 24 | 25 | class Queue(object): 26 | def __init__(self,max_size=config.max_size): 27 | print("Queue max_size %s", max_size) 28 | 29 | 30 | config.max_size=0 31 | 32 | init() 33 | Queue() -------------------------------------------------------------------------------- /tests/setup.cfg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kkyon/botflow/3c06afb8c9c8e0ac962bd2d157a79efcf05d913a/tests/setup.cfg.py -------------------------------------------------------------------------------- /tests/test_backpressure.py.bak: -------------------------------------------------------------------------------- 1 | from botflow import * 2 | from botflow.function import SpeedLimit 3 | from botflow.config import config 4 | import logging 5 | logger=logging.getLogger("botflow.queue") 6 | logger.setLevel(logging.DEBUG) 7 | sum=0 8 | 9 | def sum_up(i): 10 | 11 | global sum 12 | sum+=i 13 | return i 14 | 15 | 16 | def test_sum(): 17 | target=config.default_queue_max_size*3 18 | Bot.reset() 19 | Pipe( 20 | range(target), 21 | sum_up, 22 | ) 23 | 24 | 25 | Bot.run() 26 | print(sum) 27 | 28 | Bot.debug_print() 29 | to_sum=(0+target-1)*target/2 30 | assert sum == to_sum 31 | 32 | 33 | def test_speed(): 34 | limited_speed=20 35 | 36 | import datetime 37 | start=datetime.datetime.now() 38 | count=0 39 | speed_record=[] 40 | def speed_rate(i): 41 | nonlocal count,speed_record,start 42 | count+=1 43 | if count>=limited_speed: 44 | end=datetime.datetime.now() 45 | s=(end-start).total_seconds() 46 | speed_record.append(count/s) 47 | count=0 48 | start=datetime.datetime.now() 49 | 50 | 51 | Bot.reset() 52 | Pipe( 53 | range(limited_speed*11), 54 | SpeedLimit(limited_speed), 55 | speed_rate 56 | 57 | ) 58 | 59 | Bot.run() 60 | ok_count=0 61 | for s in speed_record: 62 | up=limited_speed*1.1 63 | low=limited_speed *0.9 64 | if s>low and s=7 67 | 68 | # assert s < up 69 | # assert s > low -------------------------------------------------------------------------------- /tests/test_node.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kkyon/botflow/3c06afb8c9c8e0ac962bd2d157a79efcf05d913a/tests/test_node.py -------------------------------------------------------------------------------- /tests/test_python_lang.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | import typing 3 | import asyncio 4 | 5 | 6 | 7 | class TestPython(TestCase): 8 | 9 | 10 | def test_callable(self): 11 | class A: 12 | pass 13 | 14 | class B: 15 | def __call__(self, *args, **kwargs): 16 | pass 17 | 18 | def f(): 19 | pass 20 | async def af(): 21 | await asyncio.sleep() 22 | self.assertTrue(isinstance(f,typing.Callable)) 23 | self.assertTrue(isinstance(lambda r:r, typing.Callable)) 24 | self.assertTrue(isinstance(af, typing.Callable)) 25 | self.assertFalse(isinstance([], typing.Callable)) 26 | self.assertFalse(isinstance('', typing.Callable)) 27 | self.assertFalse(isinstance(1, typing.Callable)) 28 | self.assertTrue(isinstance(A, typing.Callable)) 29 | self.assertTrue(isinstance(B, typing.Callable)) 30 | self.assertFalse(isinstance(A(), typing.Callable)) 31 | self.assertTrue(isinstance(B(), typing.Callable)) 32 | 33 | 34 | def test_iterable(self): 35 | def f(): 36 | yield 1 37 | self.assertTrue(isinstance([],typing.Iterable)) 38 | self.assertTrue(isinstance((), typing.Iterable)) 39 | self.assertTrue(isinstance((), typing.Tuple)) 40 | self.assertTrue(isinstance([], typing.List)) 41 | self.assertTrue(isinstance(f(), typing.Iterable)) 42 | self.assertTrue(isinstance(f(), typing.Generator)) -------------------------------------------------------------------------------- /tests/test_queue.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from botflow.queue import DataQueue ,ConditionalQueue 3 | from botflow.bdata import Bdata 4 | 5 | import logging 6 | logger=logging.getLogger("botflow.queue") 7 | logger.setLevel(logging.DEBUG) 8 | sum=0 9 | 10 | 11 | import asyncio 12 | import aiomysql 13 | 14 | loop = asyncio.get_event_loop() 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | async def fa(): 23 | ori1=Bdata.make_Bdata_zori(1) 24 | ori2 = Bdata.make_Bdata_zori(2) 25 | q = ConditionalQueue() 26 | for i in range(100): 27 | await q.put(Bdata(4, ori2)) 28 | await q.put(Bdata(3,ori1)) 29 | 30 | 31 | o1= asyncio.ensure_future(q.get_by(ori1)) 32 | 33 | result=await asyncio.gather(o1) 34 | assert result[0].data==3 35 | 36 | 37 | 38 | 39 | def test_get_by(): 40 | loop.run_until_complete(fa()) 41 | -------------------------------------------------------------------------------- /tests/test_route.py: -------------------------------------------------------------------------------- 1 | 2 | from botflow import Pipe, Branch,Join,Filter,Timer 3 | from botflow import BotFlow 4 | 5 | class A: 6 | pass 7 | class B: 8 | pass 9 | 10 | class C: 11 | pass 12 | 13 | 14 | class Counter: 15 | def __init__(self,name='counter'): 16 | self.count=0 17 | self.name=name 18 | def __call__(self, data): 19 | self.count=self.count+1 20 | return data 21 | 22 | 23 | a_count=0 24 | b_count = 0 25 | c_count = 0 26 | def only_a(i): 27 | 28 | assert(isinstance(i, A)) 29 | return i 30 | 31 | def only_b(i): 32 | 33 | assert(isinstance(i, B)) 34 | return i 35 | 36 | def only_c(i): 37 | 38 | assert(isinstance(i, C)) 39 | return i 40 | 41 | 42 | def a_to_b(i): 43 | return B() 44 | 45 | # 46 | def test_routetype(): 47 | BotFlow.reset() 48 | Pipe( 49 | [A(),B(),A()], 50 | Branch(only_a,route_type=A) 51 | ) 52 | BotFlow.run() 53 | # 54 | def test_routetype_no_shared(): 55 | BotFlow.reset() 56 | Pipe( 57 | [A(),B(),A()], 58 | Branch(only_a,route_type=A,share=False), 59 | only_b 60 | ) 61 | BotFlow.run() 62 | # 63 | def test_routetype_count(): 64 | BotFlow.reset() 65 | b_counter=Counter() 66 | a_counter=Counter() 67 | Pipe( 68 | [A(),B(),A()], 69 | Branch(only_a,a_counter,a_to_b,route_type=A,share=False,join=True), 70 | only_b, 71 | b_counter 72 | ) 73 | BotFlow.run() 74 | assert(b_counter.count==3) 75 | assert(a_counter.count == 2) 76 | 77 | # 78 | # 79 | def test_routetype_count2(): 80 | BotFlow.reset() 81 | b_counter=Counter() 82 | b1_counter = Counter() 83 | counter=Counter('count2') 84 | counter1 = Counter('count1') 85 | p=Pipe( 86 | [A(),B(),A()], 87 | Branch(only_b,counter1,route_type=B, join=True,share=True), 88 | counter, 89 | Branch(only_a,a_to_b,only_b,b1_counter,route_type=A,share=False,join=True), 90 | only_b, 91 | b_counter 92 | ) 93 | 94 | BotFlow.run() 95 | assert (counter1.count == 1) 96 | assert (counter.count == 4) 97 | 98 | assert (b1_counter.count == 2) 99 | assert (b_counter.count == 4) 100 | 101 | 102 | def test_routetype_count3(): 103 | BotFlow.reset() 104 | a_counter=Counter() 105 | b_counter=Counter() 106 | c_counter=Counter() 107 | 108 | p = Pipe( 109 | [A(), B(), A(),C(),C()], 110 | 111 | Branch(lambda i:isinstance(i,(A,C)), 112 | route_type=[A,C]), 113 | 114 | Branch( 115 | Branch( only_c,c_counter,route_type=C), 116 | 117 | share=False, 118 | route_type=[A, C]), 119 | 120 | ) 121 | 122 | BotFlow.run() 123 | assert (c_counter.count == 2) 124 | 125 | # def test_fork(): 126 | # BotFlow.reset() 127 | # a_count=0 128 | # b_count = 0 129 | # p=Pipe( 130 | # [A(),A()], 131 | # Fork(a_to_b,a_to_b,share=False,join=True), 132 | # only_b 133 | # 134 | # ) 135 | # 136 | # BotFlow.run() 137 | 138 | def test_double_loop(): 139 | BotFlow.reset() 140 | count=0 141 | def sum(x): 142 | nonlocal count 143 | count+=x 144 | 145 | 146 | p = Pipe( 147 | range(10), 148 | range(10), 149 | sum 150 | 151 | 152 | ) 153 | 154 | BotFlow.run() 155 | assert count==45*10 156 | 157 | 158 | 159 | 160 | def test_filter(): 161 | BotFlow.reset() 162 | Pipe( 163 | [A(),B(),C()], 164 | Filter(lambda x:isinstance(x,A)), 165 | only_a 166 | 167 | ) 168 | BotFlow.run() 169 | # 170 | def test_filter2(): 171 | 172 | BotFlow.reset() 173 | Pipe( 174 | [A(),B(),C()], 175 | Filter(filter_func=lambda r:isinstance(r,A)), 176 | only_a 177 | 178 | ) 179 | BotFlow.run() 180 | 181 | 182 | 183 | -------------------------------------------------------------------------------- /tests/test_stop.py.bak: -------------------------------------------------------------------------------- 1 | 2 | from botflow import Pipe, Branch,Join,Filter,Timer 3 | from botflow import BotFlow 4 | from botflow import Delay 5 | class A: 6 | pass 7 | class B: 8 | pass 9 | 10 | class C: 11 | pass 12 | 13 | 14 | def test_stop(): 15 | def check_stop(i): 16 | if i>10: 17 | BotFlow.stop() 18 | 19 | return i 20 | BotFlow.reset() 21 | Pipe( 22 | range(10000), 23 | Delay(), 24 | check_stop, 25 | print, 26 | 27 | 28 | ) 29 | BotFlow.run() 30 | assert True -------------------------------------------------------------------------------- /tests/type_hint.py: -------------------------------------------------------------------------------- 1 | 2 | import typing 3 | 4 | class func(object): 5 | 6 | def __call__(self, s:str): 7 | return s 8 | def fun(s): 9 | return s 10 | def only_str(s : str): 11 | return s 12 | 13 | print(typing.get_type_hints(only_str)) 14 | s=typing.get_type_hints(only_str) 15 | print(len(s)) 16 | print(s.values()) 17 | 18 | print('callable object') 19 | s=typing.get_type_hints(func.__call__) 20 | print(len(s)) 21 | print(s.values()) 22 | 23 | 24 | print(typing.get_type_hints(fun)) 25 | 26 | 27 | 28 | from botflow import Pipe,BotFlow 29 | 30 | 31 | class A: 32 | pass 33 | 34 | class B: 35 | pass 36 | 37 | 38 | class C: 39 | pass 40 | 41 | def only_a(data: A): 42 | 43 | assert isinstance(data,A) 44 | print("i got A") 45 | 46 | 47 | Pipe( 48 | [A(),B(),C()], 49 | only_a, 50 | 51 | print 52 | 53 | ) 54 | 55 | BotFlow.run() --------------------------------------------------------------------------------