├── .gitignore ├── LICENSE.md ├── OpenStack └── oslo_ │ ├── README.md │ ├── config │ ├── config.conf │ ├── config_parser.py │ └── config_test.py │ └── i18n │ └── i18n_app │ ├── __init__.py │ ├── _i18n.py │ ├── locale │ ├── zh_CN │ │ └── LC_MESSAGES │ │ │ └── i18n_app.po │ └── zh_TW │ │ └── LC_MESSAGES │ │ └── i18n_app.po │ └── main.py ├── README.md ├── _config.yml ├── algorithms ├── algorithm_sorting.py └── question1.py ├── basic_grammar.md ├── class1_preliminary.md ├── class2_annotation.md ├── class3_inherit.md ├── class4_thorough.md ├── contributed_modules ├── mongodb │ └── mongodb_utils.py ├── mysql │ ├── mysqldb_ │ │ ├── __init__.py │ │ ├── mysql_lock.py │ │ └── study_mysqldb.py │ └── sqlalchemy_ │ │ ├── __init__.py │ │ ├── mysql_lock.py │ │ └── study_sqlalchemy.py ├── redis │ ├── README.md │ ├── redis_helper.py │ └── redis_test.py └── requests │ ├── README.md │ ├── __init__.py │ ├── restful.py │ ├── test.py │ └── utils.py ├── crawlers └── spider │ ├── __init__.py │ ├── downloader.py │ ├── main.py │ ├── parser.py │ ├── proxypools.py │ ├── tools.py │ ├── urlsmanager.py │ └── writer.py ├── data_analysis ├── __init__.py ├── academic_concept │ └── matrix_product.md ├── study_matplotlib │ ├── __init__.py │ ├── graphs │ │ ├── __init__.py │ │ ├── graphs_histogram.py │ │ ├── graphs_quadratic.py │ │ └── graphs_trigonometric.py │ ├── matplotlib_2d.py │ ├── png │ │ └── numpy.png │ ├── save_file │ │ ├── graphs_histogram.png │ │ ├── graphs_quadratic.png │ │ └── graphs_trigonometric.png │ └── test.py ├── study_mlab │ ├── __init__.py │ └── mlab_3d.py ├── study_numpy │ ├── __init__.py │ ├── _test.py │ ├── numpy_functions │ │ ├── np_arange.py │ │ ├── np_dot.py │ │ ├── np_mgrid_ogrid.py │ │ └── np_random.py │ ├── numpy_multidimensional.py │ ├── numpy_ndarray.py │ ├── numpy_polynomial_poly1d.py │ ├── numpy_ufunc.py │ └── png │ │ └── numpy.png └── study_tesseract │ ├── __init__.py │ ├── image │ └── 20170807142300.png │ └── test01.py ├── data_structure.md ├── decorator.md ├── dict.md ├── file.md ├── levenshtein.py ├── list.md ├── loop.md ├── page_parser ├── __init__.py ├── beautifulsoup │ ├── __init__.py │ ├── parser.py │ ├── test.py │ └── test_403.py └── xpath │ ├── __init__.py │ ├── file.txt │ └── test.py ├── rpc └── RPyC │ ├── demo.py │ └── tutorials │ ├── part01.py │ └── services │ └── registry_discovery │ ├── __init__.py │ ├── client_test.py │ └── service01.py ├── scheduler_task └── study_apscheduler │ ├── __init__.py │ ├── examples │ ├── demo.py │ ├── executors │ │ ├── __init__.py │ │ ├── configure.py │ │ ├── process_pool.py │ │ └── simple.py │ ├── jobstores │ │ ├── __init__.py │ │ ├── job_store.py │ │ └── log.py │ └── schedules │ │ ├── __init__.py │ │ └── schdule.py │ └── tutorials │ └── __init__.py ├── set.md ├── skills ├── README.md ├── async_call.py ├── download_music.py └── httpserver.py ├── standard_library ├── __init__.py ├── email │ ├── message_html.py │ ├── message_text.py │ ├── shell_mime.py │ └── textfile ├── process │ ├── __init__.py │ ├── process_pool.py │ └── simple_core.py ├── study_argparse.py ├── study_color_print.py ├── study_file.py ├── study_filter.py ├── study_httplib.py ├── study_itertools.py ├── study_logging.py ├── study_os.py ├── study_regular_expression.py ├── study_socket.py └── threads │ ├── __init__.py │ ├── demo_consumer_producer.py │ ├── my_thread_pool.py │ ├── rethread.py │ ├── thread_pool_test.py │ └── thread_pool_test2.py └── use_package.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | env/ 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *,cover 49 | .hypothesis/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # dotenv 85 | .env 86 | 87 | # virtualenv 88 | .venv 89 | venv/ 90 | ENV/ 91 | 92 | # Spyder project settings 93 | .spyderproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | 98 | .idea/ 99 | *.*~ 100 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 tom.lee 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /OpenStack/oslo_/README.md: -------------------------------------------------------------------------------- 1 | # [oslo公共库](https://docs.openstack.org/oslo.config/latest/) 2 | > OpenStack开源公共库 3 | -------------------------------------------------------------------------------- /OpenStack/oslo_/config/config.conf: -------------------------------------------------------------------------------- 1 | [DEFAULT] 2 | # DEFAULT 不可省略,必须大写 3 | enabled_api = ec2, api_keystone, api_compute 4 | bind_host = 196.168.1.111 5 | bind_port = 9999 6 | 7 | [RABBIT] 8 | host = 127.0.0.1 9 | port = 12345 10 | ssl = true 11 | username = guest 12 | password = guest -------------------------------------------------------------------------------- /OpenStack/oslo_/config/config_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-31 下午1:29 4 | # @Author : Tom.Lee 5 | # @File : config_parser.py 6 | # @Product : PyCharm 7 | # @Docs : 8 | # @Source : 9 | 10 | from oslo_config import cfg 11 | from oslo_config import types 12 | 13 | 14 | class ConfigManager(object): 15 | PortType = types.Integer(1, 65535) 16 | default_opts = [ 17 | cfg.StrOpt( 18 | 'bind_host', 19 | default='0.0.0.0', 20 | help='IP address to listen on.'), 21 | cfg.Opt( 22 | 'bind_port', # 只有Opt类型才能指定PortType 23 | type=PortType, 24 | default=9292, 25 | help='Port number to listen on.') 26 | ] 27 | default_opt = cfg.ListOpt( 28 | 'enabled_api', 29 | default=['ec2', 'api_compute'], 30 | help='List of APIs to enable by default.') 31 | cli_opts = [ 32 | cfg.BoolOpt('verbose', 33 | short='v', 34 | default=False, 35 | help='Print more verbose output'), 36 | cfg.BoolOpt('debug', 37 | short='d', 38 | default=False, 39 | help='Print debugging output'), 40 | ] 41 | rabbit_group = cfg.OptGroup( 42 | name='RABBIT', 43 | title='RABBIT options' 44 | ) 45 | rabbit_opt = cfg.BoolOpt( 46 | 'ssl', 47 | default=False, 48 | help='use ssl for connection') 49 | rabbit_opts = [ 50 | cfg.StrOpt( 51 | 'host', 52 | default='localhost', 53 | help='IP/hostname to listen on.'), 54 | cfg.IntOpt( 55 | 'port', 56 | default=5672, 57 | help='Port number to listen on.') 58 | ] 59 | 60 | def __init__(self): 61 | self.conf = cfg.CONF 62 | self._register_opts() 63 | 64 | def _register_opts(self): 65 | # default 66 | self.conf.register_opt(self.default_opt) 67 | self.conf.register_opts(self.default_opts) 68 | # rabbit 69 | self.conf.register_group(self.rabbit_group) 70 | self.conf.register_opts(self.rabbit_opts, self.rabbit_group) 71 | self.conf.register_opt(self.rabbit_opt, self.rabbit_group) 72 | # cli 73 | self.conf.register_cli_opts(self.cli_opts) 74 | self.conf(default_config_files=['config.conf']) 75 | 76 | @property 77 | def bind_port(self): 78 | return getattr(self.conf, 'bind_port', None) 79 | 80 | @property 81 | def bind_host(self): 82 | return getattr(self.conf, 'bind_host', None) 83 | 84 | 85 | config_manager = ConfigManager() 86 | if __name__ == '__main__': 87 | print config_manager.bind_port 88 | print config_manager.bind_host 89 | -------------------------------------------------------------------------------- /OpenStack/oslo_/config/config_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-31 上午10:40 4 | # @Author : Tom.Lee 5 | # @File : config.py 6 | # @Product : PyCharm 7 | # @Docs : 8 | # @Source : https://github.com/openstack/oslo.config/blob/master/oslo_config/cfg.py 9 | 10 | """ 11 | 配置文件中的选项(group, opts),必须在代码中显示的注册,否则无法解析 12 | """ 13 | 14 | from oslo_config import cfg 15 | from oslo_config import types 16 | 17 | # 端口规范 18 | PortType = types.Integer(1, 65535) 19 | 20 | # 多个配置项组成一个模式 21 | default_opts = [ 22 | cfg.StrOpt('bind_host', 23 | default='0.0.0.0', 24 | help='IP address to listen on.'), 25 | cfg.Opt('bind_port', # 只有Opt类型才能指定PortType 26 | type=PortType, 27 | default=9292, 28 | help='Port number to listen on.') 29 | ] 30 | 31 | # 单个配置项模式 32 | default_opt = cfg.ListOpt('enabled_api', 33 | default=['ec2', 'api_compute'], 34 | help='List of APIs to enable by default.') 35 | 36 | # 命令行选项 37 | cli_opts = [ 38 | cfg.BoolOpt('verbose', 39 | short='v', 40 | default=False, 41 | help='Print more verbose output'), 42 | cfg.BoolOpt('debug', 43 | short='d', 44 | default=False, 45 | help='Print debugging output'), 46 | ] 47 | 48 | # 配置 rabbit_group 组 49 | rabbit_group = cfg.OptGroup( 50 | name='RABBIT', 51 | title='RABBIT options' 52 | ) 53 | # 配置组中的模式,通常以配置组的名称为前缀(非必须) 54 | rabbit_opt = cfg.BoolOpt('ssl', 55 | default=False, 56 | help='use ssl for connection') 57 | # 配置组中的多配置项模式 58 | rabbit_opts = [ 59 | cfg.StrOpt('host', 60 | default='localhost', 61 | help='IP/hostname to listen on.'), 62 | cfg.IntOpt('port', 63 | default=5672, 64 | help='Port number to listen on.') 65 | ] 66 | 67 | 68 | def register_default_opts(conf): 69 | """ 70 | 注册默认组的配置项 71 | """ 72 | conf.register_opt(default_opt) 73 | conf.register_opts(default_opts) 74 | 75 | 76 | def register_rabbit_group(conf): 77 | """ 78 | 注册 rabbit 信息 79 | """ 80 | # 配置组必须在其组件被注册前注册! 81 | conf.register_group(rabbit_group) 82 | # 注册配置组中含有多个配置项的模式,必须指明配置组 83 | conf.register_opts(rabbit_opts, rabbit_group) 84 | # 注册配置组中的单配置项模式,指明配置组 85 | conf.register_opt(rabbit_opt, rabbit_group) 86 | 87 | 88 | def register_cli_opts(conf): 89 | """ 90 | 注册 cli 选项 91 | :param conf: 92 | :return: 93 | """ 94 | conf.register_cli_opts(cli_opts) 95 | 96 | 97 | def get_bind_host(conf): 98 | """ 99 | 使用选项 bind_host 100 | """ 101 | return getattr(conf, 'bind_host', None) 102 | 103 | 104 | def get_bind_port(conf): 105 | """ 106 | 使用选项 bind_port 107 | """ 108 | return conf.bind_port 109 | 110 | 111 | def get_rabbit_username(conf): 112 | """ 113 | 配置文件中存在,代码没有注册,不能解析 114 | """ 115 | return conf.RABBIT.username 116 | 117 | 118 | if __name__ == '__main__': 119 | # 创建配置类 120 | config = cfg.CONF 121 | # 开始注册default 122 | register_default_opts(config) 123 | register_rabbit_group(config) 124 | register_cli_opts(config) 125 | 126 | # 加载配置文件 127 | config(default_config_files=['config.conf']) 128 | print 'host:', get_bind_host(config) 129 | # list_all_sections 130 | for section in config.list_all_sections(): 131 | print section 132 | 133 | print config.RABBIT 134 | print config.RABBIT.host 135 | print get_rabbit_username(config) 136 | -------------------------------------------------------------------------------- /OpenStack/oslo_/i18n/i18n_app/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-9-16 下午2:52 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : __init__.py.py 7 | # @Product : PyCharm 8 | # @Docs : 9 | # @Source : 10 | 11 | 12 | -------------------------------------------------------------------------------- /OpenStack/oslo_/i18n/i18n_app/_i18n.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-9-16 下午3:10 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : _i18n.py.py 7 | # @Product : PyCharm 8 | # @Docs : https://docs.openstack.org/oslo.i18n/latest/user/usage.html 9 | # @Source : 10 | 11 | import oslo_i18n 12 | 13 | DOMAIN = "i18n_app" 14 | 15 | _translators = oslo_i18n.TranslatorFactory(domain=DOMAIN) 16 | 17 | # The primary translation function using the well-known name "_" 18 | _ = _translators.primary 19 | 20 | # The contextual translation function using the name "_C" 21 | # requires oslo.i18n >=2.1.0 22 | _C = _translators.contextual_form 23 | 24 | # The plural translation function using the name "_P" 25 | # requires oslo.i18n >=2.1.0 26 | _P = _translators.plural_form 27 | 28 | # Translators for log levels. 29 | # 30 | # NOTE(dhellmann): This is not needed for new projects as of the 31 | # Pike series. 32 | # 33 | # The abbreviated names are meant to reflect the usual use of a short 34 | # name like '_'. The "L" is for "log" and the other letter comes from 35 | # the level. 36 | _LI = _translators.log_info 37 | _LW = _translators.log_warning 38 | _LE = _translators.log_error 39 | _LC = _translators.log_critical 40 | 41 | 42 | def get_available_languages(): 43 | """ 44 | 返回当前可以提供翻译的语言列表 45 | 46 | #所有的语言包在 /usr/local/lib/python2.7/dist-packages/babel/locale-data/ 47 | :return: 48 | """ 49 | return oslo_i18n.get_available_languages(DOMAIN) 50 | 51 | 52 | def translate(msg, user_locale='zh_CN'): 53 | """ 54 | 翻译"msg"为指定的语言,默认"en_US" 55 | 56 | :param msg: the object to translate 57 | :param user_locale: the locale to translate the message to, if None the 58 | default system locale will be used 59 | 'en_US' 'zh_CN' 60 | :returns: the translated object in unicode, or the original object if 61 | it could not be translated 62 | """ 63 | return oslo_i18n.translate(msg, user_locale) 64 | 65 | 66 | def enable_lazy(enable=True): 67 | return oslo_i18n.enable_lazy(enable) 68 | -------------------------------------------------------------------------------- /OpenStack/oslo_/i18n/i18n_app/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-9-16 下午2:53 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : main.py 7 | # @Product : PyCharm 8 | # @Docs : https://docs.openstack.org/oslo.i18n/latest/user/usage.html 9 | # @Source : 10 | 11 | from _i18n import get_available_languages 12 | 13 | languages = get_available_languages() 14 | 15 | print languages 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python Study 2 | > 控制台打印乱码: **` print '你好,世界!'.decode('utf-8') `** 3 | 4 | ```python 5 | url = 'http://{0}:{1}/{2}'.format('0.0.0.0', 2375, 'xxx') 6 | url = 'http://{ip}:{port}/{uri}'.format(ip='0.0.0.0', port=2375, uri='xxx') 7 | url = 'http://%s:%d/%s' % ('0.0.0.0', 2375, 'xxx') 8 | ``` 9 | 10 | ## Windows Python 依赖库[ **PythonLibs**](http://www.lfd.uci.edu/~gohlke/pythonlibs/) 11 | * 1.找到对应的 `whl` 包下载 12 | * 2.直接`pip install *.whl` 或者修改`.whl`文件为`.zip`文件,解压缩文件的`Python文件夹`复制到--`python`安装目录下的`Lib`--目录下 13 | 14 | ## [Python 中文翻译文档集合](http://python.usyiyi.cn/) 15 | ## [Python 官方文档](https://docs.python.org/2.7/) 16 | ## [Top Python APIs](https://www.programcreek.com/python/index/module/list) 17 | 18 | ## Python2.7环境变量 19 | > 假如`sys.path`不对,则使用Python终端 ` sys.path = [...] `重新设置即可. 20 | > 默认环境配置如下: 21 | 22 | ```shell 23 | root@node-40:~# python 24 | Python 2.7.6 (default, Jun 22 2015, 17:58:13) 25 | [GCC 4.8.2] on linux2 26 | Type "help", "copyright", "credits" or "license" for more information. 27 | >>> import sys 28 | >>> sys.path 29 | ['', '/usr/lib/python2.7', '/usr/lib/python2.7/plat-x86_64-linux-gnu', '/usr/lib/python2.7/lib-tk', '/usr/lib/python2.7/lib-old', '/usr/lib/python2.7/lib-dynload', '/usr/local/lib/python2.7/dist-packages', '/usr/lib/python2.7/dist-packages', '/usr/lib/python2.7/dist-packages/PILcompat', '/usr/lib/python2.7/dist-packages/gtk-2.0', '/usr/lib/pymodules/python2.7'] 30 | >>> 31 | ``` 32 | ```shell 33 | # /etc/profile 34 | 35 | export PYTHONPATH=/usr/lib/python2.7:/usr/lib/python2.7/plat-x86_64-linux-gnu:/usr/lib/python2.7/lib-tk:/usr/lib/python2.7/lib-old:/usr/lib/python2.7/lib-dynload:/usr/local/lib/python2.7/dist-packages:/usr/lib/python2.7/dist-packages:/usr/lib/python2.7/dist-packages/PILcompat:/usr/lib/python2.7/dist-packages/gtk-2.0:/usr/lib/pymodules/python2.7 36 | export PATH=$PATH:$PYTHONPATH 37 | ``` 38 | 39 | ## Windows环境`Python2.7`与`Python3.x` 共同使用 40 | 41 | * Python2.7 : `$ py -2` 42 | * Python3.x : `$ py -3` 43 | * Python2.7 pip : `$ py -2 -m pip xxx` 44 | * Python3.x pip : `$ pip3 xxx` 45 | 46 | ## pycharm 47 | > settings 48 | 49 | * enable Code compatibility inspection: `settings` --> `code compatibility inspection` 50 | 51 | ## Python内置工具 52 | 53 | * 下载服务器: 54 | * Python2.x 55 | * `$ python -m SimpleHttpServer` 默认端口8000 56 | * `$ py -2 -m SimpleHTTPServer` 默认端口8000 57 | * `$ py -2 -m SimpleHTTPServer 9090` 指定端口9090 58 | * 使用代码: 59 | ```python 60 | import SimpleHTTPServer 61 | 62 | SimpleHTTPServer.test() 63 | ``` 64 | * Python3.x 65 | * `$ python -m http.server` 66 | * `$ py -3 -m http.server` 67 | 68 | * Json格式化:`$ curl http://localhost:8080/get | python -m json.tool` 69 | 70 | * 执行Python代码:`$ python -c "print 'hello world!'"` 71 | 72 | * 解压zip包: 73 | * 创建zip包:`$ python -m zipfile -c tom.zip tom.txt` 74 | * 解压zip包:`$ python -m zipfile -e tom.zip .` 75 | * 查看zip包:`$ python -m zipfile -l tom.zip` 76 | 77 | 78 | * 文件处理: 79 | ```python 80 | import shutil 81 | 82 | shutil.copy('C:\Users\Administrator\Desktop\ctools2.rar','q.rar') 83 | ``` 84 | 85 | 86 | ## 关于Python工作中的一些总结性技术 87 | 88 | * [爬虫](https://github.com/tomoncle/PythonStudy/tree/master/crawlers/) 89 | * [RPC](https://github.com/tomoncle/PythonStudy/tree/master/rpc/) 90 | * [定时任务](https://github.com/tomoncle/PythonStudy/tree/master/scheduler_task/study_apscheduler/) 91 | * [mysql](https://github.com/tomoncle/PythonStudy/tree/master/contributed_modules/mysql/) 92 | * [mongodb](https://github.com/tomoncle/PythonStudy/tree/master/contributed_modules/mongodb/) 93 | * [redis](https://github.com/tomoncle/PythonStudy/tree/master/contributed_modules/redis/) 94 | * [数据分析](https://github.com/tomoncle/PythonStudy/tree/master/data_analysis/):`maptplotlib`, `malb` , `numpy`, `tesseract` 95 | * [页面解析技术](https://github.com/tomoncle/PythonStudy/tree/master/page_parser/): `bs4`, `xpath` 96 | * [openstack开源模块](https://github.com/tomoncle/PythonStudy/tree/master/OpenStack/oslo_/) 97 | * [Python 装饰器](https://github.com/tomoncle/PythonStudy/tree/master/decorator.md) 98 | * [Python 多线程/多进程](https://github.com/tomoncle/PythonStudy//tree/masterstandard_library/threads/) 99 | * [Python 内置模块](https://github.com/tomoncle/PythonStudy/tree/master/standard_library/) 100 | * [Python 使用技巧](https://github.com/tomoncle/PythonStudy/tree/master/skills) 101 | 102 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-midnight -------------------------------------------------------------------------------- /algorithms/algorithm_sorting.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-4-19 上午11:14 4 | # @Author : Tom.Lee 5 | # @Description : 6 | # @File : algorithm_sorting.py 7 | # @Product : PyCharm 8 | 9 | 10 | def bubble_sort(): 11 | """ 12 | 冒泡排序: 13 | n个元素,循环n-1轮, 14 | 每一轮,比较n-i次,选出最大值 15 | """ 16 | L = [9, 8, 7, 6, 5, 4, 3, 2, 1] 17 | n = len(L) 18 | for i in range(1, n): # 比较n-1轮 19 | # print n - i 20 | for j in range(n - i): # 每i轮比较n-i次,选出最大值 21 | if L[j] > L[j + 1]: 22 | L[j], L[j + 1] = L[j + 1], L[j] 23 | 24 | print L 25 | 26 | 27 | def insertion_sort(): 28 | """ 29 | 插入排序算法: 30 | 原序列:[2,3,1,34,5,6,11,7,8] 31 | 32 | 下标从0开始: 33 | 第一次:取下标1 和下标[:1]比较 34 | 第二次:取下标2 和下标[:2]比较 35 | 。。。 36 | 第n-1次:取下标n-1(注意此时的元素已经是最后一个元素了)和[:n-1]比较 37 | 共比较n-1次 38 | """ 39 | 40 | L = [9, 8, 7, 5, 6, 4, 3, 2, 1] 41 | n = len(L) 42 | for i in range(n - 1): 43 | for j in range(i + 1): # 因为下标从0开始,所以第i次,对应的数据位置要 i+1表示当前下标位置 44 | # print i+1,'-',j 45 | if L[i + 1] < L[j]: L[i + 1], L[j] = L[j], L[i + 1] 46 | 47 | print L 48 | 49 | 50 | def selection_sort(): 51 | """ 52 | 选择排序算法: 53 | 54 | 每次找出最小元素,放置到序列头部,循环序列 55 | 56 | 第一次:找出最小放到下标0 57 | 第二次:在剩余找出最小放到下标1 58 | 。。。 59 | 第n-1次 60 | """ 61 | L = [5, 4, 3, 2, 1, 0, -77] 62 | n = len(L) 63 | for i in range(n - 1): 64 | for j in range(i + 1, n): 65 | # print i,'-',j 66 | if L[i] > L[j]: L[i], L[j] = L[j], L[i] 67 | 68 | print L 69 | -------------------------------------------------------------------------------- /algorithms/question1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2017/5/16 21:56 4 | # @Author : tom.lee 5 | # @Site : 6 | # @File : question1.py 7 | # @Software: 8 | 9 | """ 10 | 输入一个数组a,和一个整数k,计算出这个数组随机组成的数字,大于或等于的值中最小的一个 11 | 12 | "这个算法有问题.有时间再改吧." 13 | """ 14 | 15 | a = [1, 3, 4, 5] 16 | k = 1222 17 | 18 | a.sort() 19 | kps = False 20 | ks = list(str(k)) 21 | length = len(ks) 22 | 23 | 24 | def _min(lis, v): 25 | for n in lis: 26 | if n < v: 27 | continue 28 | else: 29 | return n 30 | return None 31 | 32 | 33 | def deep(start, length, kps): 34 | for i in range(start, length): 35 | if kps: 36 | ks[i] = str(a[0]) 37 | if int(ks[i]) in a: 38 | continue 39 | else: 40 | m = _min(a, int(ks[i])) 41 | kps = True 42 | if not m: 43 | ks[i - 1] = str(a[a.index(int(ks[i - 1]) + 1)]) 44 | 45 | deep(i, length, kps) 46 | else: 47 | ks[i] = str(m) 48 | 49 | 50 | deep(0, length, kps) 51 | print ''.join(ks) 52 | -------------------------------------------------------------------------------- /basic_grammar.md: -------------------------------------------------------------------------------- 1 | # 基础语法 2 | python思想中一切皆对象,但是由于python是一门应用非常广泛的语言,所以在某些方面,你可以把python称为面向对象的语言,但它也可以是一门面向过程的语言: 3 | **程序 = 数据结构 + 算法** 4 | * 面向过程:以指令为中心,由指令处理数据,如何组织代码解决问题,如:shell运维 5 | * 面向对象:以数据为中心,所有的处理代码都围绕数据展开,如何设计数据结构组织数据, 6 | 并提供对此类数据所允许处理操作,如:web开发 7 | 8 | 9 | # 数据结构: 10 | * 1.Python最基本的是数据结构是序列, 序列为索引为非负整数的有序对象的集合 11 | * 2.Python中内建的6种数据序列:列表,元组,字符串,Unicode字符串,buffer对象和xrange对象 12 | * 3.None表示Python的空值,对于Python而言,不管 **"",None,[],{},set(),()** 在if判断语句中都为False 13 | 14 | ### 基本数据类型 15 | * Integral 类型: 16 | * 整形:不可变类型 17 | * 布尔类型:True,False 18 | 19 | * 浮点类型: 20 | * 浮点数:3.1415 21 | * 复数 :3+6j 22 | * 十进制数字: 23 | * 字符串:"abc” 24 | 25 | ### Python中组合数据类型: 26 | 注:该类型其实都是对对象的引用 27 | * 元组:tuple(),不可变对象 28 | * 列表:list[],可变对象 29 | * 字典:dict{},可变对象 30 | * 集合:set([]),可变对象 31 | 32 | ### 容器,类型,对象相关概念: 33 | list,tuple,dict都是容器 34 | * 1.list,tuple,dict 可以跨行定义,最后一个元素后可以使用逗号,但元素为空时,不允许使用逗号 35 | * 2.所有对象都有引用计数,当引用计数为0时,垃圾回收器会回收此对象 36 | * 3.获取对象引用计数`import sys ; sys.getrefcount(object)` 37 | * 4.删除对象引用,del(object),减少对象引用计数 38 | * 5.列表和字典都支持两种赋值操作:浅复制和深复制 39 | * 浅复制:如list_1=list 创建一个新对象,但是还是对之前对象的引用,新对象会影响之前对象 40 | * 深复制:创建一个新对象,然后递归的复制一份,放置到新对象中,新对象不会影响之前对象 41 | 深复制可以使用copy模块的deepcopy()实现 42 | * 6.Python中的对象都是"第一类",即使用标识符命名的所有对象都有相同的状态,于是能够命名所有对象, 43 | 都可以直接当数据进行处理,比如:`a = 1,b = 2,result = b/a`表示数值对象直接可以当作数据处理 44 | * 7.所有序列(概念:序列为索引为非负整数的有序对象的集合)都支持迭代(字符串也是一个序列) 45 | * 8.所有序列都支持的操作和方法: 46 | * 索引 : `val[i]` 47 | * 切片 : `val[i:j]` 48 | * 扩展切片 : `val[i:j:stride]` 49 | * 长度 : `len(val)` 50 | * 最小值 : `min(val)` 51 | * 最大值 : `max(val)` 52 | * 数值序列求和 : `sum(val)` 53 | * 都为True : `all(val)` 54 | * 任意为True : `any(val)` 55 | * 连接 : `val+val2` 56 | * 重复 : `val * N` 57 | * 存在 : `item in container` 58 | * 不存在 : `item not in container` 59 | 60 | ### 对象引用(变量): 61 | * Python中所有的数据存为内存对象: 62 | * Python中,"变量"实际是"指向内存对象的引用" 63 | 64 | * 动态类型: 65 | * 在任何时刻,只要需要,某个对象引用都可以重新引用一个不同的对象,可以是不同的数据类型 66 | * 内置函数type()用于返回给定对象的数据类型 67 | * 内置函数id()查看变量引用对象在内存中的地址 68 | * "=”用于将变量名与内存中的对象绑定,如果对象存在,就直接指向该对象,如果不存在,用"=”创建引用的对象 69 | 70 | **例如**:`name="tom"`,这个操作Python会在内存中声明一个name变量,开辟一块内存空间存储`tom`对象,然后`name`指向 71 | `tom`这个字符串对象,当`tom`这个字符串的引用计数为0时,Python会在合适的机会进行垃圾回收 72 | 73 | # 命名规范: 74 | * 只能包含字母,数字,下划线,不能以数字开头 75 | * 区分字母大小写 76 | * 禁止使用关键字 77 | 78 | * 以单一下划线开头的命名的变量(\_name)不会被from module import * 语句导入 79 | * 前后有两个下划线命名的变量(\_\_str__)是系统定义的变量名,对Python解释器有特殊含义 80 | * 以两个下划线开头但结尾没有的变量(\_\_numbers)是类的本地变量 81 | * 在交互模式下,"\_" 用于保存最后表达式的结果 82 | 83 | **注意:变量名没有类型,对象才有** 84 | 85 | # 帮助文档: 86 | * 查看对象方法:`dir(obj)` 87 | * 查看对象函数使用:`help(obj.method)` 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /class1_preliminary.md: -------------------------------------------------------------------------------- 1 | # python 的类初步 2 | 3 | # 基础语法 4 | ### 定义 5 | 关键字:class 6 | 格式: class ClassName(): pass 7 | 8 | ### 初始化 9 | 使用`__init__(self)`来进行初始化操作,`self`是python类中方法中必须存在的参数,表示当前对象, 10 | 使用该参数传递对象,类似java的this, 当然`self`只是一个别名,你可用随便指定任意名称, 11 | 如`abc`,但是不建议这样做,可读性差 12 | 13 | ```python 14 | # -*- coding=utf-8 -*- 15 | # 定义 16 | class Person(): 17 | def __init__(self): 18 | print '初始化 ...' 19 | 20 | # 引用 21 | p=Person() 22 | 23 | print p 24 | 25 | """结果: 26 | >>初始化 ... 27 | >><__main__.Person instance at 0x7f0ac74f1c68> 28 | """ 29 | ``` 30 | ### 初始化并且传递参数 31 | 使用`__init__(self,param)`来进行初始化操作 32 | 33 | ```python 34 | # -*- coding=utf-8 -*- 35 | # 定义 36 | class Person(): 37 | def __init__(self, params): 38 | self.params = params 39 | print '初始化 ...' 40 | 41 | 42 | # 引用 43 | p = Person("hello python") 44 | 45 | print p.params 46 | 47 | """结果: 48 | >>初始化 ... 49 | >>hello python 50 | """ 51 | ``` 52 | 53 | ### 传递任意参数 54 | ```python 55 | # -*- coding=utf-8 -*- 56 | # 定义 57 | class Person(): 58 | def __init__(self, *args,**kwargs): 59 | print '传入参数:',kwargs,args 60 | for k, v in kwargs.iteritems(): # 使用setattr()方法添加属性 61 | setattr(self, k, v) 62 | 63 | print '初始化 ...' 64 | 65 | 66 | # 引用 67 | 68 | p = Person(**{'k':1,'kk':2}) 69 | 70 | print p.__dict__ # 打印对象属性字典 71 | 72 | ``` 73 | 74 | 75 | 76 | # 对象操作之动态属性 77 | python 实例对象可用动态的添加或删除属性 78 | ### 添加 79 | * `setattr(object,'field_name',value)` : 添加或修改object对象的属性field_name,值为value 80 | * `object.field_name = value` : 直接使用`对象.属性=值` 为对象添加或修改属性 81 | ### 获取 82 | * `getattr(object,'field_name',default_value)`:获取object对象的field_name属性,假如该属性不存在,返回default_value默认值 83 | * `object.filed_name` : 直接使用`对象.属性` 获取对象的属性值,如果属性不存在,抛出异常 84 | ### 删除 85 | * `delattr(object,'field_name')` : 删除object对象的field_name属性,如果属性不存在,抛出异常 86 | * `del object.filed_name` : 使用`del 对象.属性` 删除object对象的field_name属性,如果属性不存在,抛出异常 87 | ### 判读是否存在 88 | * `hasattr(object, field_name)`: 判断object对象是否存在field_name属性 89 | 90 | 91 | # 属性和方法 92 | 在类里"私有属性"使用双下划线(\_\_)开头,"私有方法"也是使用双下划线(\_\_)开头,"受保护属性和受保护方法"使用单下划线(\_)开头, 93 | "公开属性和方法使用字母开头": 94 | ```python 95 | class A(object): 96 | def __init__(self): 97 | self.__name = "tom" # private 私有属性,不能被外部调用, (实际是可以用 ._{className}__{filedName}来访问) 98 | self._id = 10 # protected 受保护的属性,可以被外部调用,但不建议 99 | self.age = 20 # public 公开的属性,完全被外部访问 100 | 101 | def __say_hi(self): 102 | """ 103 | 私有方法,不能被外部调用,(实际是可以用 ._{className}__{methodName}来访问) 104 | :return: 105 | """ 106 | print 'private method : say hi %s' % self.__name 107 | 108 | def _hello(self): 109 | """ 110 | 受保护的方法,可以被外部调用,但不建议 111 | :return: 112 | """ 113 | print 'protected method: hello %s' % self._id 114 | 115 | def hello_world(self): 116 | """ 117 | 公开的方法 118 | :return: 119 | """ 120 | print "public method : hello world %s" % self.age 121 | 122 | ``` 123 | -------------------------------------------------------------------------------- /class2_annotation.md: -------------------------------------------------------------------------------- 1 | # Python 类注解 2 | 3 | ```python 4 | 5 | class Person(object): 6 | # 类属性,所有实例只存在一份类属性,共享该属性. 7 | class_attr = None 8 | 9 | def __init__(self, id, name): 10 | """ 11 | 实例方法,第一个参数必须为 self 12 | :param id: 实例属性 13 | :param name: 实例属性 14 | """ 15 | self.id = id 16 | self.name = name 17 | self.__private_filed = 'private filed' # 使用"__"开头的属性,为私有属性,外部无法访问 18 | 19 | @classmethod 20 | def class_method(cls): 21 | """ 22 | 类方法, 使用 @classmethod 注解来标注,参数为 cls 23 | 可以通过cls.属性或方法名调用 24 | """ 25 | return ' this is class method' 26 | 27 | @staticmethod 28 | def static_method(): 29 | """ 30 | static方法, 使用@staticmethod 注解来标注 31 | 只能通过Person.属性名或方法名,一个全局函数 32 | """ 33 | return ' this is static method' 34 | 35 | def get_private_filed(self): 36 | """ 37 | 可以定义方法来提供外部访问该属性,而不知道内部结构 38 | :return:__private_filed 39 | """ 40 | return self.__private_filed 41 | 42 | ``` 43 | 44 | # Python类深入 45 | ### 访问限制 46 | python中访问限制,如果一个属性由双下划线开头(__),该属性就无法被外部访问. 47 | 但是,如果一个属性以"__xxx__"的形式定义,那它又可以被外部访问了, 48 | 以"__xxx__"定义的属性在Python的类中被称为特殊属性,有很多预定义的特殊属性可以使用, 49 | 通常我们不要把普通属性用"__xxx__"定义。 50 | 以单下划线开头的属性"_xxx"虽然也可以被外部访问,但是,按照习惯,他们不应该被外部访问。 51 | ### 属性 52 | 绑定在一个实例上的属性不会影响其他实例,但是,类本身也是一个对象,如果在类上绑定一个属性, 53 | 则所有实例都可以访问类的属性,并且,所有实例访问的类属性都是同一个! 54 | 也就是说,实例属性每个实例各自拥有,互相独立,而类属性有且只有一份。 55 | 当实例属性和类属性重名时,实例属性优先级高,它将屏蔽掉对类属性的访问。但不会影响其他实例对象 56 | 当类属性变为(__)私有时,外部依然无法访问 57 | ### 函数方法 58 | 我们在 class 中定义的实例方法其实也是属性,它实际上是一个函数对象: 59 | 举例:p1.get_grade 返回的是一个函数对象但这个函数是一个绑定到实例的函数, 60 |    p1.get_grade() 才是方法调用 61 | 因为方法也是一个属性,所以,它也可以动态地添加到实例上,只是需要用 types.MethodType() 62 | 把一个函数变为一个方法: 63 | 代码: 64 | import types 65 | def fn_get_grade(self): 66 | if self.score >= 80: 67 | return 'A' 68 | if self.score >= 60: 69 | return 'B' 70 | return 'C' 71 | class Person(object): 72 | def __init__(self, name, score): 73 | self.name = name 74 | self.score = score 75 | p1 = Person('Bob', 90) 76 | p1.get_grade = types.MethodType(fn_get_grade, p1, Person) 77 | print p1.get_grade() 78 | end: 79 | ### 类方法 80 | 通过标记一个 @classmethod,该方法将绑定到类上,而非类的实例。 81 | 类方法的第一个参数将传入类本身,通常将参数名命名为 cls 82 | 因为是在类上调用,而非实例上调用,因此类方法无法获得任何实例变量,只能获得类的引用。 83 | ### 继承 84 | class Person(object): 85 | def __init__(self, name, gender): 86 | self.name = name 87 | self.gender = gender 88 | class Student(Person): 89 | #定义Student类时,只需要把额外的属性加上,例如score: 90 | def __init__(self, name, gender, score): 91 | super(Student, self).__init__(name, gender) 92 | self.score = score 93 | 一定要用 super(Student, self).__init__(name, gender) 去初始化父类,否则, 94 | 继承自 Person 的 Student 将没有 name 和 gender。 95 | 函数super(Student, self)将返回当前类继承的父类,即 Person , 96 | 然后调用__init__()方法,注意self参数已在super()中传入,在__init__()中将隐式传递, 97 | 不需要写出(也不能写)。 98 | ### python中判断类型 99 | 函数isinstance()可以判断一个变量的类型,既可以用在Python内置的数据类型 100 | 如str、list、dict,也可以用在我们自定义的类,它们本质上都是数据类型。 101 | >>>p = Person('zhangsan','male') 102 | >>>isinstance(p, Person) 103 | True 104 | ### python中多态 105 | 类具有继承关系,并且子类类型可以向上转型看做父类类型 106 | 子类重写父类的方法,调用时首先调用子类的方法实现 107 | ### python中多重继承 108 | class A(B,C): 109 | pass 110 | 多重继承的目的是从两种继承树中分别选择并继承出子类,以便组合功能使用。 111 | ### python中获取对象信息 112 | type() 函数获取变量的类型 113 | dir() 函数获取变量的所有属性 114 | setattr(s, 'name', 'Adam') # 设置新的name属性 115 | getattr(s, 'age', 20) # 获取age属性,如果属性不存在,就返回默认值20 116 | # python的特殊方法 117 | ### \_\_str__()和 \_\_repr__() 118 | 如果要把一个类的实例变成 str,就需要实现特殊方法__str__(): 119 | 代码: 120 | def __str__(self): 121 |  return '(Person: %s)' % self.name 122 | __repr__ = __str__ #偷懒的定义__repr__函数 123 |    end: 124 | 因为 Python 定义了__str__()和__repr__()两种方法, 125 | __str__()用于显示给用户,而__repr__()用于显示给开发人员。 126 | ### \_\_cmp__() 127 | 对 int、str 等内置数据类型排序时,Python的 sorted() 按照默认的比较函数 cmp 排序, 128 | 但是,如果对一组 Student 类的实例排序时,就必须提供我们自己的特殊方法 __cmp__() 129 | 代码: 130 | def __cmp__(self, s): 131 | if self.name < s.name: 132 | return -1 133 | elif self.name > s.name: 134 | return 1 135 | else: 136 | return 0 137 | end: 138 | 使用:print sorted(person_list) 139 | 以分数排序: 140 |   def __cmp__(self, s): 141 | if self.score == s.score: 142 | return cmp(self.name, s.name) 143 | return -cmp(self.score, s.score) 144 | ### \_\_len__() 145 | 如果一个类表现得像一个list,要获取有多少个元素,就得用 len() 函数。 146 | 要让 len() 函数工作正常,类必须提供一个特殊方法__len__(),它返回元素的个数。 147 | ### \_\_slots__ 148 | 如果要限制添加的属性,例如,Student类只允许添加 name、gender和score 这3个属性, 149 | 就可以利用Python的一个特殊的__slots__来实现。 150 | __slots__的目的是限制当前类所能拥有的属性,如果不需要添加任意动态的属性, 151 | 使用__slots__也能节省内存 152 | 代码: 153 | class Student(object): 154 | __slots__ = ('name', 'gender', 'score') 155 | pass 156 | end: 157 | ### \_\_call__ 158 | 所有的函数都是可调用对象。 159 | 一个类实例也可以变成一个可调用对象,只需要实现一个特殊方法__call__()。 160 | class A(object): 161 | def __call__(self,s): 162 | return 'hello %s'%s 163 | >>>a=A 164 | >>>print a('jack') 165 | hello jack 166 | ### python中 @property 167 | @property表示可以将方法当作属性来使用 168 | 第一个score(self)是get方法,用@property装饰, 169 | 第二个score(self, score)是set方法,用@score.setter装饰 170 |      171 | 代码: 172 | @property 173 | def score(self): 174 | return self.__score 175 | @score.setter 176 | def score(self, score): 177 | if score < 0 or score > 100: 178 | raise ValueError('invalid score') 179 | self.__score = score 180 | end: 181 | 使用: 182 | obj.score = 90 #调用set方法 183 | print obj.score #调用get方法 184 | 185 | # 练习 186 | ```python 187 | def fib(num): 188 | """ 189 | 斐波那契数列 190 | """ 191 | a, b, L = 0, 1, [] 192 | for n in range(num): 193 | L.append(a) 194 | a, b = b, a + b 195 | return L 196 | 197 | 198 | def gcd(a, b): 199 | if b == 0: 200 | return a 201 | return gcd(b, a % b) 202 | 203 | 204 | class Rational(object): 205 | """ 206 | 分数计算 207 | """ 208 | 209 | def __init__(self, p, q): 210 | self.p = p 211 | self.q = q 212 | 213 | def __add__(self, r): 214 | return Rational(self.p * r.q + self.q * r.p, self.q * r.q) 215 | 216 | def __sub__(self, r): 217 | return Rational(self.p * r.q - self.q * r.p, self.q * r.q) 218 | 219 | def __mul__(self, r): 220 | return Rational(self.p * r.p, self.q * r.q) 221 | 222 | def __div__(self, r): 223 | return Rational(self.p * r.q, self.q * r.p) 224 | 225 | def __str__(self): 226 | g = gcd(self.p, self.q) 227 | return '%s/%s' % (self.p / g, self.q / g) 228 | 229 | __repr__ = __str__ 230 | 231 | 232 | r1 = Rational(1, 2) 233 | r2 = Rational(1, 4) 234 | print r1 + r2 235 | print r1 - r2 236 | print r1 * r2 237 | print r1 / r2 238 | ``` 239 | -------------------------------------------------------------------------------- /class3_inherit.md: -------------------------------------------------------------------------------- 1 | # 类继承 2 | 语法: 3 | ```python 4 | class ClassName(父类): 5 | def __init__(self [,父类属性] [,子类属性]): 6 | # 重写init方法 7 | super(ClassName, self).__init__([父类属性] [,子类属性]) 8 | self.子类属性 = 子类属性 9 | ``` 10 | 11 | 举例:如下代码,函数`super(Student, self)`将返回当前类继承的父类,即 `Person` , 12 | 然后调用`__init__()`方法,注意`self`参数已在`super()`中传入,在`__init__()`中将隐式传递,不需要写出(也不能写) 13 | ```python 14 | class Person(object): 15 | def __init__(self, name, sex): 16 | self.name = name 17 | self.sex = sex 18 | 19 | def say_hello(self): 20 | return 'hello python' 21 | 22 | def show_me(self): 23 | return 'my name is %s , sex is %s' % (self.name, self.sex) 24 | 25 | 26 | class Student(Person): 27 | # 定义Student类时,只需要把额外的属性加上,例如score: 28 | def __init__(self, name, sex, score): 29 | super(Student, self).__init__(name, sex) 30 | self.score = score 31 | 32 | def student(self): 33 | """子类方法""" 34 | return 'i am a student, my name is %s' % self.name 35 | 36 | def show_me(self): 37 | """重写父类方法""" 38 | return 'my name is %s , sex is %s , my final score is %d' % (self.name, self.sex, self.score) 39 | 40 | 41 | stu = Student("aric", 'man', 20) 42 | print stu.__dict__ # 查看子类属性字典 >>{'score': 20, 'name': 'aric', 'sex': 'man'} 43 | print stu.say_hello() # 查看子类继承父类的方法 >>hello python 44 | print stu.student() # 查看子类独有的方法 >>i am a student, my name is aric 45 | print stu.show_me() # 查看子类重写的父类方法 >>my name is aric , sex is man , my final score is 20 46 | 47 | ``` 48 | 49 | 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /class4_thorough.md: -------------------------------------------------------------------------------- 1 | # python 类深入 2 | 3 | # 特殊方法 4 | ### \_\_new__() 方法: 5 | 在python内部,真正的初始化函数时\_\_new__()方法,它在\_\_init__()方法之前被调用,它是一个类方法,在创建对象时调用。 6 | 而\_\_init__()方法是在创建完对象后调用,对当前对象的实例做一些一些初始化,无返回值。 7 | 如果重写了\_\_new__()而在\_\_new__()里面没有调用\_\_init__()或者没有返回实例,那么\_\_init__()将不起作用。 8 | 9 | ###### 使用 10 | * 使用\_\_new__()方法设计单例模式 11 | ```python 12 | import threading 13 | lock = threading.Lock() 14 | 15 | 16 | class Singleton(object): 17 | __instance = None 18 | 19 | def __init__(self): 20 | pass 21 | 22 | def __new__(cls, *args): 23 | if not Singleton.__instance: 24 | # set lock keep thread safe 25 | try: 26 | lock.acquire() 27 | if not Singleton.__instance: 28 | Singleton.__instance = object.__new__(cls, *args) 29 | except Exception, e: 30 | print 'Singleton: init error : %s' % e 31 | finally: 32 | lock.release() 33 | return Singleton.__instance 34 | 35 | 36 | ### TEST 37 | s1 = Singleton() 38 | s2 = Singleton() 39 | s1.dicts = {'name': 'tom'} 40 | 41 | print id(s2) == id(s1), s2.dicts 42 | ``` 43 | ### \_\_setattr__() 方法: 44 | python可用动态给对象添加属性,禁止添加属性需要重写该方法 45 | ```python 46 | def __setattr__(self, key, value): 47 | pass 48 | ``` 49 | 50 | ### \_\_dict__() 方法: 51 | 使用\_\_dict__()方法用于返回对象的属性字典,python重写\_\_setattr__()方法禁止对象添加属性, 52 | 但是可以通过 ` obj.__dict__['index']= 11 `添加属性,重写\_\_dict__()方法可以禁用此方法 53 | ```python 54 | def __dict__(self): 55 | pass 56 | ``` 57 | ###### 构造全局字典 58 | ```python 59 | import threading 60 | 61 | lock = threading.Lock() 62 | 63 | 64 | class ApplicationDICT(object): 65 | __instance = None 66 | __maps = {} 67 | 68 | def __new__(cls, *args): 69 | if not ApplicationDICT.__instance: 70 | # set lock keep thread safe 71 | try: 72 | lock.acquire() 73 | if not ApplicationDICT.__instance: 74 | ApplicationDICT.__instance = object.__new__(cls, *args) 75 | except Exception, e: 76 | print 'Singleton: init error : %s' % e 77 | finally: 78 | lock.release() 79 | return ApplicationDICT.__instance 80 | 81 | @property 82 | def maps(self): 83 | return self.__maps 84 | 85 | def set_maps(self, k, v): 86 | assert k and v 87 | self.__maps[k] = v 88 | 89 | def __setattr__(self, key, value): 90 | pass 91 | 92 | def __dict__(self): 93 | pass 94 | 95 | ``` 96 | -------------------------------------------------------------------------------- /contributed_modules/mongodb/mongodb_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-6-13 下午12:56 4 | # @Author : Tom.Lee 5 | # @Docs : http://www.cnblogs.com/hhh5460/p/5838516.html 6 | # @File : mongodb.py 7 | # @Product : PyCharm 8 | import pymongo 9 | 10 | 11 | class _Mongodb(object): 12 | def __init__(self, 13 | host=None, 14 | port=None, 15 | document_class=dict, 16 | tz_aware=None, 17 | connect=None, 18 | **kwargs): 19 | self.__mongodb = pymongo.MongoClient( 20 | host, port, document_class, tz_aware, connect, **kwargs) 21 | 22 | @property 23 | def mongodb_client(self): 24 | return self.__mongodb 25 | 26 | 27 | class MongodbUtils(object): 28 | def __init__(self, 29 | host=None, 30 | port=None, 31 | document_class=dict, 32 | tz_aware=None, 33 | connect=None, 34 | **kwargs): 35 | self.__mongodb_client = _Mongodb( 36 | host=host, 37 | port=port, 38 | document_class=document_class, 39 | tz_aware=tz_aware, 40 | connect=connect, 41 | **kwargs).mongodb_client 42 | self.__database = None 43 | self.__collection = None 44 | 45 | @property 46 | def mongodb_client(self): 47 | return self.__mongodb_client 48 | 49 | @property 50 | def mongodb_database(self): 51 | assert self.__database 52 | return self.__database 53 | 54 | @property 55 | def mongodb_collection(self): 56 | assert self.__collection 57 | return self.__collection 58 | 59 | def use_db(self, db): 60 | """ 61 | 切换数据库 > use tom_db 62 | 63 | :param db: 64 | :return: 65 | """ 66 | self.__database = self.db_create_or_get(db) 67 | return self 68 | 69 | def use_collection(self, collection, db=None): 70 | """ 71 | 使用表操作 > db.tom_table 72 | 73 | :param collection: 74 | :param db: 75 | :return: 76 | """ 77 | if db: 78 | self.__database = self.db_create_or_get(db) 79 | self.__collection = self.mongodb_database[collection] 80 | return self 81 | 82 | def db_list(self): 83 | """ 84 | 数据库列表 show dbs 85 | 86 | :return: ['db1','db2'] 87 | """ 88 | return self.mongodb_client.database_names() 89 | 90 | def db_exists(self, db_name): 91 | """ 92 | :param db_name: 93 | :return: True/False 94 | """ 95 | return db_name in self.db_list() 96 | 97 | def db_create_or_get(self, db_name): 98 | """ 99 | 创建或使用 100 | > use tom_db 101 | > db.createCollection('table1') # 第二步开始创建数据库 102 | 103 | :param db_name: 104 | :return: __mongodb.get_database(db_name) 105 | """ 106 | # self.mongodb_client.get_database(db_name) 107 | 108 | return self.mongodb_client[db_name] 109 | 110 | def db_delete(self, db_name): 111 | """ 112 | 删除 113 | > use tom_db 114 | > db.dropDatabase() 115 | 116 | :param db_name: 117 | :return: 118 | error : {u'code': 26, u'ok': 0.0, u'errmsg': u'ns not found'} 119 | success: {u'ns': u'tom_db.tom_table', u'ok': 1.0, u'nIndexesWas': 1} 120 | """ 121 | return self.mongodb_client.drop_database(db_name) 122 | 123 | def collection_list(self): 124 | """ 125 | 表(文档)列表 > show tables 126 | :return: 127 | """ 128 | return self.mongodb_database.collection_names() 129 | 130 | def collection_create_or_get(self, collection_name): 131 | """ 132 | 创建或获取表 createCollection('table1') 133 | 134 | :param collection_name: 135 | :return: 136 | """ 137 | return self.mongodb_database[collection_name] 138 | 139 | def collection_exists(self, collection_name): 140 | """ 141 | 集合是否存在 142 | :param collection_name: 143 | :return: 144 | """ 145 | return collection_name in self.collection_list() 146 | 147 | def collection_delete(self, collection_name): 148 | """ 149 | 删除集合 db.tom_table2.drop() 150 | 151 | :param collection_name: 152 | :return: 153 | """ 154 | return self.mongodb_database.drop_collection(collection_name) 155 | 156 | def document_count(self, filter_=None): 157 | """ 158 | db.tom_table.count() 159 | 160 | :param filter_:{'name':'zs'} 161 | :return: 162 | """ 163 | return self.mongodb_collection.count(filter=filter_) 164 | 165 | def document_find(self, *args, **kwargs): 166 | """ 167 | db.tom_table.find({'seq':'_seq_7'}) 168 | 169 | :param args: 170 | :param kwargs:{'seq':'_seq_7'} 171 | :return: 172 | """ 173 | return self.mongodb_collection.find(*args, **kwargs) 174 | 175 | def document_insert(self, dict_item): 176 | """ 177 | db.tom_table.insert({'name':'jack'}) 178 | 179 | :param dict_item: {'name':'jack'} 180 | :return: 181 | """ 182 | return self.mongodb_collection.insert(dict_item) 183 | 184 | def document_drop(self): 185 | """ 186 | 删除全部文档 187 | :return: 188 | """ 189 | return self.mongodb_collection.drop() 190 | 191 | def document_delete(self, filter_, collation=None): 192 | """ 193 | db.tom_table.deleteOne({'seq':'_seq_7'}) 194 | 195 | :param filter_: {'name':'jack'} 196 | :param collation: 197 | :return: 198 | """ 199 | result = self.mongodb_collection.delete_one(filter_, collation) 200 | return result.delete_count > 0 201 | 202 | def document_delete_list(self, filter_, collation=None): 203 | """ 204 | db.tom_table.deleteMany({'seq':'_seq_7'}) 205 | 206 | :param filter_: {'seq':'_seq_7'} 207 | :param collation: 208 | :return: 209 | """ 210 | self.mongodb_collection.delete_many(filter_, collation) 211 | return self.document_count(filter_) == 0 212 | -------------------------------------------------------------------------------- /contributed_modules/mysql/mysqldb_/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-30 下午3:49 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : __init__.py.py 7 | # @Product : PyCharm 8 | # @Docs : 9 | # @Source : 10 | 11 | 12 | -------------------------------------------------------------------------------- /contributed_modules/mysql/mysqldb_/mysql_lock.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2017/8/25 22:42 4 | # @Author : Tom.lee 5 | # @Site : 6 | # @File : mysql_lock.py 7 | # @Software: PyCharm 8 | 9 | 10 | """ 11 | 通过MySQL实现分布式锁服务 12 | """ 13 | import MySQLdb 14 | import logging 15 | import time 16 | 17 | FORMAT_STR = '%(asctime)s -%(module)s:%(filename)s-L%(lineno)d-%(levelname)s: %(message)s' 18 | logger = logging.getLogger() 19 | logger.setLevel(logging.DEBUG) 20 | formatter = logging.Formatter(FORMAT_STR) 21 | handler = logging.StreamHandler() 22 | handler.setFormatter(formatter) 23 | logger.addHandler(handler) 24 | logging.info("Current log level is : %s", logging.getLevelName(logger.getEffectiveLevel())) 25 | 26 | 27 | class MySqlLock(object): 28 | LOCK_SQL = "SELECT get_lock('{key}', {timeout}) FROM dual" 29 | UNLOCK_SQL = "SELECT release_lock('{key}') FROM dual" 30 | 31 | def __init__(self, lock_key=None, *args, **kwargs): 32 | """ 33 | :param lock_key: 34 | :param args: 参数与MySQLdb初始化参数一致. 35 | :param kwargs: 参数与MySQLdb初始化参数一致. 36 | host='localhost' 37 | user='test' 38 | passwd='test' 39 | db='test' 40 | """ 41 | self.__db = MySQLdb.connect(*args, **kwargs) 42 | self.lock_key = lock_key or '7ab18906739e4662ac01e69f5ebb7352' 43 | 44 | def _execute(self, sql): 45 | """ 46 | MySQL数据库操作 47 | :param sql: 48 | :return: (1L,) --> tuple 49 | """ 50 | res = (-1,) 51 | cursor = self.__db.cursor() 52 | try: 53 | cursor.execute(sql) 54 | if cursor.rowcount != 1: 55 | logging.error("Multiple rows returned in mysql lock function.") 56 | else: 57 | res = cursor.fetchone() 58 | except Exception, ex: 59 | logging.error("执行SQL\"%s\" 失败! 异常信息: %s", sql, str(ex)) 60 | finally: 61 | cursor.close() 62 | return res 63 | 64 | def lock(self, timeout): 65 | """ 66 | MySQL数据库加锁 67 | :param timeout: 超时时间 68 | :return: 69 | """ 70 | # 加锁操作 71 | lk = self._execute(self.LOCK_SQL.format(key=self.lock_key, timeout=timeout)) 72 | 73 | if lk[0] == 0: 74 | logging.debug("锁'%s'已经被创建.", self.lock_key) 75 | return False 76 | elif lk[0] == 1: 77 | logging.debug("创建锁'%s'." % self.lock_key) 78 | return True 79 | else: 80 | logging.error("获取锁失败!") 81 | return None 82 | 83 | def unlock(self): 84 | """ 85 | 释放MySQL锁. 86 | :return: 87 | """ 88 | # 释放操作 89 | uk = self._execute(self.UNLOCK_SQL.format(key=self.lock_key)) 90 | 91 | if uk[0] == 0: 92 | logging.debug("释放锁'%s'失败(该锁被其他进程持有)" % self.lock_key) 93 | return False 94 | elif uk[0] == 1: 95 | logging.debug("释放锁'%s'." % self.lock_key) 96 | return True 97 | else: 98 | logging.error("锁'%s'不存在." % self.lock_key) 99 | return None 100 | 101 | 102 | if __name__ == "__main__": 103 | l = MySqlLock(host='localhost', user='root', passwd='root', db='iaasms') 104 | ret = l.lock(15) 105 | if not ret: 106 | logging.error("获取锁失败,退出!") 107 | quit() 108 | 109 | time.sleep(15) # 模拟跨进程的同步操作! 110 | # raise Exception('模拟操作异常,mysql会自动释放该进程持有的锁.') 111 | # TODO something 112 | print 'hello ok!' 113 | 114 | l.unlock() 115 | -------------------------------------------------------------------------------- /contributed_modules/mysql/mysqldb_/study_mysqldb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-3-27 下午4:29 4 | # @Author : Tom.Lee 5 | # @Description : mysql 操作 6 | # @File : study_mysql.py 7 | # @Product : PyCharm 8 | import MySQLdb 9 | import logging 10 | from contextlib import closing 11 | 12 | """ 13 | # # 1.创建数据库的连接 14 | # conn = MySQLdb.connect(host='localhost', port=3306, user='root', 15 | # passwd='root', db='iaasms_dev', ) 16 | # 17 | # # 2.创建游标 18 | # cur = conn.cursor(MySQLdb.cursors.DictCursor) 19 | # 20 | # # 3.通过游标cur 操作execute()方法可以写入纯sql语句对数据进行操作 21 | # sql = ''' 22 | # SELECT a.name AS snapshot_name, b.name AS volume_name 23 | # FROM snapshot a INNER JOIN volume b 24 | # ON a.volume_id=b.volume_id 25 | # ''' 26 | # count = cur.execute(sql) # 返回总条数 27 | # # result = cur.fetchmany(count) # 返回指定条目的结果集 28 | # result = cur.fetchall() 29 | # # 4.关闭游标 30 | # cur.close() 31 | # 32 | # # 5.提交事务,必须要有这个方法,否则数据不会被真正的插入。 33 | # conn.commit() 34 | # 35 | # # 6.关闭连接 36 | # conn.close() 37 | # 38 | # # ************打印*********** 39 | # print result 40 | 41 | # 一次插入多条记录,,返回值为受影响的行数。 42 | # sql="insert into student values(%s,%s,%s,%s)" 43 | # cur.executemany(sql,[ 44 | # ('3','Tom','1 year 1 class','6'), 45 | # ('3','Jack','2 year 1 class','7'), 46 | # ('3','rick','2 year 2 class','7'), 47 | # ]) 48 | 49 | # *******************close conn*************************** 50 | from contextlib import closing 51 | import MySQLdb 52 | 53 | ''' At the beginning you open a DB connection. Particular moment when 54 | you open connection depends from your approach: 55 | - it can be inside the same function where you work with cursors 56 | - in the class constructor 57 | - etc 58 | ''' 59 | db = MySQLdb.connect("host", "user", "pass", "database") 60 | with closing(db.cursor()) as cur: 61 | cur.execute("somestuff") 62 | results = cur.fetchall() 63 | # do stuff with results 64 | 65 | cur.execute("insert operation") 66 | # call commit if you do INSERT, UPDATE or DELETE operations 67 | db.commit() 68 | 69 | cur.execute("someotherstuff") 70 | results2 = cur.fetchone() 71 | # do stuff with results2 72 | 73 | # at some point when you decided that you do not need 74 | # the open connection anymore you close it 75 | db.close() 76 | 77 | """ 78 | 79 | # 创建名为MySQL的日志 80 | logger = logging.getLogger('MySQL') 81 | # 设置logger的level为DEBUG 82 | logger.setLevel(logging.DEBUG) 83 | # 创建一个输出日志到控制台的StreamHandler 84 | handler = logging.StreamHandler() 85 | formatter = logging.Formatter('[%(asctime)s] %(name)s:%(levelname)s: %(message)s') 86 | handler.setFormatter(formatter) 87 | # 给logger添加上handler 88 | logger.addHandler(handler) 89 | 90 | 91 | class _Closing(closing): 92 | def __exit__(self, *exc_info): 93 | if self.thing: 94 | self.thing.close() 95 | 96 | 97 | class MySQLUtils(object): 98 | def __init__(self, *args, **kwargs): 99 | """ 100 | :param args: 101 | :param kwargs: 102 | """ 103 | for k, v in kwargs.iteritems(): 104 | setattr(self, k, v) 105 | 106 | self.__args = args 107 | self.__kwargs = kwargs 108 | self.__connection = None 109 | self.__cursor = None 110 | 111 | def __enter__(self): 112 | """ 113 | 打开资源,支持with语法 114 | :return: MySQLUtils instance 115 | """ 116 | self.open() 117 | return self 118 | 119 | def __exit__(self, exc_type, exc_val, exc_tb): 120 | """ 121 | 关闭资源,支持with语法 122 | :param exc_type: 123 | :param exc_val: 124 | :param exc_tb: 125 | :return: 126 | """ 127 | self.close() 128 | if exc_tb: 129 | # 默认返回None, 返回None或False 发生异常交由外部调用程序捕获(建议) 130 | # 如果返回True,则由该函数内部处理,外部调用会继续执行 131 | logger.error('[%s]%s' % (exc_type, exc_val)) 132 | 133 | def open(self): 134 | """ 135 | 打开连接 136 | :return: 137 | """ 138 | if self.__connection: 139 | raise MySQLdb.MySQLError("connection already connected.") 140 | self.__connection = MySQLdb.connect(*self.__args, **self.__kwargs) 141 | if self.__cursor: 142 | raise MySQLdb.MySQLError("cursor already opened.") 143 | self.__cursor = self.__connection.cursor(MySQLdb.cursors.DictCursor) 144 | logger.info("connection opened.") 145 | 146 | def close(self): 147 | """ 148 | 关闭连接 149 | :return: 150 | """ 151 | with _Closing(self.__cursor) as _: 152 | pass 153 | with _Closing(self.__connection) as _: 154 | pass 155 | self.__cursor = None 156 | self.__connection = None 157 | 158 | logger.info("connection close success.") 159 | 160 | def __execute(self, sql, commit=False): 161 | """ 162 | 执行SQL 163 | :param sql: 164 | :param commit: 165 | :return:tuple result or row numbers 166 | """ 167 | if not (self.__connection and self.__cursor): 168 | raise MySQLdb.MySQLError("connection already closed.") 169 | count = self.__cursor.execute(sql) # 返回总条数 170 | result = self.__cursor.fetchall() 171 | self.__connection.commit() if commit else None 172 | return count if commit else result 173 | 174 | def select(self, sql, formatter_func=None): 175 | """ 176 | 查询函数 177 | :param sql: 178 | :param formatter_func:格式化函数 179 | :return: 180 | """ 181 | if formatter_func: 182 | return map(formatter_func, self.__execute(sql)) 183 | return self.__execute(sql) 184 | 185 | def save_or_update(self, sql): 186 | """ 187 | 编辑或修改 188 | :param sql: 189 | :return:row numbers 190 | """ 191 | return self.__execute(sql, True) 192 | 193 | def delete(self, sql): 194 | """ 195 | 删除资源 196 | :param sql: 197 | :return: row numbers 198 | """ 199 | return self.__execute(sql, True) 200 | 201 | 202 | if __name__ == '__main__': 203 | mu = MySQLUtils(host='localhost', port=3306, user='root', 204 | passwd='root', db='iaasms_dev') 205 | import datetime 206 | 207 | 208 | def formatter_datetime(dic): 209 | for k, v in dic.iteritems(): 210 | if isinstance(v, datetime.datetime): 211 | dic[k] = str(v) 212 | return dic 213 | 214 | 215 | # 1. try-finally 216 | # try: 217 | # mu.open() 218 | # # raise Exception('异常') 219 | # print mu.select('SELECT * FROM flavor', formatter_datetime) 220 | # print mu.delete('DELETE FROM flavor WHERE id=42') 221 | # finally: 222 | # mu.close() 223 | 224 | # 2. with 225 | with mu as mu: 226 | mu.close() 227 | # raise Exception('异常') 228 | print mu.select('SELECT * FROM flavor', formatter_datetime) 229 | print mu.delete('DELETE FROM flavor WHERE id=42') 230 | 231 | print getattr(mu, 'host'), getattr(mu, 'port'), getattr(mu, 'no', None) 232 | 233 | -------------------------------------------------------------------------------- /contributed_modules/mysql/sqlalchemy_/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-30 下午4:08 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : __init__.py.py 7 | # @Product : PyCharm 8 | # @Docs : 9 | # @Source : 10 | 11 | 12 | -------------------------------------------------------------------------------- /contributed_modules/mysql/sqlalchemy_/mysql_lock.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-26 下午3:54 4 | # @Author : Tom.Lee 5 | # @File : mysql_lock2.py 6 | # @Product : PyCharm 7 | # @Docs : 8 | # @Source : 9 | 10 | """ 11 | 通过MySQL sqlalchemy 实现分布式锁服务 12 | """ 13 | import logging 14 | import time 15 | from sqlalchemy import create_engine 16 | 17 | FORMAT_STR = '%(asctime)s -%(module)s:%(filename)s-L%(lineno)d-%(levelname)s: %(message)s' 18 | logger = logging.getLogger() 19 | logger.setLevel(logging.DEBUG) 20 | formatter = logging.Formatter(FORMAT_STR) 21 | handler = logging.StreamHandler() 22 | handler.setFormatter(formatter) 23 | logger.addHandler(handler) 24 | logging.info("Current log level is : %s", logging.getLevelName(logger.getEffectiveLevel())) 25 | 26 | 27 | class MySqlLock(object): 28 | LOCK_SQL = "SELECT get_lock('{key}', {timeout}) FROM dual" 29 | UNLOCK_SQL = "SELECT release_lock('{key}') FROM dual" 30 | 31 | def __init__(self, lock_key=None, **kwargs): 32 | """ 33 | :param lock_key: 34 | :param args: 参数与MySQLdb初始化参数一致. 35 | :param kwargs: 参数与MySQLdb初始化参数一致. 36 | host='localhost' 37 | user='test' 38 | passwd='test' 39 | db='test' 40 | """ 41 | self.engine = create_engine('mysql+mysqldb://{user}:{pwd}@{host}:{port}/{db_name}?charset=utf8'.format( 42 | user=kwargs.pop('user', None), 43 | pwd=kwargs.pop('pwd', None), 44 | host=kwargs.pop('host', 'localhost'), 45 | port=kwargs.pop('pop', '3306'), 46 | db_name=kwargs.pop('db_name', None) 47 | )) 48 | self.lock_key = lock_key or '7ab18906739e4662ac01e69f5ebb7352' 49 | 50 | def _execute(self, sql): 51 | """ 52 | MySQL数据库操作 53 | :param sql: 54 | :return: (1L,) --> tuple 55 | """ 56 | res = -1 57 | try: 58 | e = self.engine.execute(sql) 59 | if e.rowcount <= 1: 60 | res = e.rowcount 61 | except Exception, ex: 62 | logging.error("执行SQL\"%s\" 失败! 异常信息: %s", sql, str(ex)) 63 | finally: 64 | pass 65 | return res 66 | 67 | def lock(self, timeout): 68 | """ 69 | MySQL数据库加锁 70 | :param timeout: 超时时间 71 | :return: 72 | """ 73 | # 加锁操作 74 | lk = self._execute(self.LOCK_SQL.format(key=self.lock_key, timeout=timeout)) 75 | 76 | if lk == 0: 77 | logging.debug("锁'%s'已经被创建.", self.lock_key) 78 | return False 79 | elif lk == 1: 80 | logging.debug("创建锁'%s'." % self.lock_key) 81 | return True 82 | else: 83 | logging.error("获取锁失败!") 84 | return None 85 | 86 | def unlock(self): 87 | """ 88 | 释放MySQL锁. 89 | :return: 90 | """ 91 | # 释放操作 92 | uk = self._execute(self.UNLOCK_SQL.format(key=self.lock_key)) 93 | 94 | if uk == 0: 95 | logging.debug("释放锁'%s'失败(该锁被其他进程持有)" % self.lock_key) 96 | return False 97 | elif uk == 1: 98 | logging.debug("释放锁'%s'." % self.lock_key) 99 | return True 100 | else: 101 | logging.error("锁'%s'不存在." % self.lock_key) 102 | return None 103 | 104 | 105 | if __name__ == "__main__": 106 | l = MySqlLock(host='localhost', user='root', pwd='root', db_name='iaasms') 107 | ret = l.lock(15) 108 | if not ret: 109 | logging.error("获取锁失败,退出!") 110 | quit() 111 | 112 | time.sleep(5) # 模拟跨进程的同步操作! 113 | # raise Exception('模拟操作异常,mysql会自动释放该进程持有的锁.') 114 | # TODO something 115 | print 'hello ok!' 116 | 117 | l.unlock() 118 | -------------------------------------------------------------------------------- /contributed_modules/mysql/sqlalchemy_/study_sqlalchemy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-26 下午2:48 4 | # @Author : Tom.Lee 5 | # @File : study_sqlalchemy.py 6 | # @Product : PyCharm 7 | # @Docs : 8 | # @Source : sqlalchemy.sql.selectable.py 9 | import time 10 | from sqlalchemy import ( 11 | Table, Column, MetaData, create_engine) 12 | from sqlalchemy.engine.result import ResultProxy 13 | from sqlalchemy.sql.sqltypes import ( 14 | Unicode, INTEGER) 15 | 16 | url = 'mysql+mysqldb://{user}:{pwd}@{host}:{port}/{db_name}?charset=utf8'.format( 17 | user='root', 18 | pwd='root', 19 | host='localhost', 20 | port='3306', 21 | db_name='iaasms' 22 | ) 23 | # pool_recycle=3600 连接超时参数 24 | engine = create_engine(url) 25 | 26 | table = Table( 27 | 'tom_test', MetaData(), 28 | Column('id', INTEGER, primary_key=True), 29 | Column('start_time', INTEGER, index=False), 30 | Column('last_time', INTEGER, nullable=False), 31 | Column('count', INTEGER, nullable=False), 32 | Column('region', Unicode(20, _warn_on_bytestring=False)) 33 | ) 34 | 35 | # 创建表 36 | table.create(engine, True) 37 | 38 | 39 | def _formatter_data(res): 40 | """ 41 | sqlalchemy.engine.result.ResultProxy 对象数据提取 42 | 43 | res.cursor._rows # 数据 44 | res._metadata.keys 或 res.cursor.description # 数据库表字段名 45 | :param res: 46 | :return: list 47 | """ 48 | assert isinstance(res, ResultProxy) 49 | assert res.returns_rows 50 | rows = [] 51 | for _row in res.cursor._rows: 52 | row = {} 53 | for index, column in enumerate(res._metadata.keys): 54 | row[column] = _row[index] 55 | rows.append(row) 56 | return rows 57 | 58 | 59 | def _execute_success(res): 60 | """ 61 | sqlalchemy.engine.result.ResultProxy 数据库修改状态 62 | 63 | res.returns_rows # 是否返回数据 64 | res.rowcount 是否执行成功 1 success,0 error 65 | :param res: 66 | :return: boolean 67 | """ 68 | assert isinstance(res, ResultProxy) 69 | return res.rowcount > 0 70 | 71 | 72 | def insert(): 73 | # 插入 74 | # sqlalchemy.exc.IntegrityError 主键冲突异常 75 | sql = table.insert().values(**{ 76 | 'id': 2, 77 | 'start_time': time.time(), 78 | 'last_time': time.time(), 79 | 'count': 1, 80 | 'region': 'test' 81 | }) 82 | res = engine.execute(sql) 83 | print _execute_success(res) 84 | 85 | 86 | def select(): 87 | # 查询 88 | sql = table.select().where(table.c.id == 2) 89 | res = engine.execute(sql) 90 | print _formatter_data(res) 91 | 92 | 93 | def update(): 94 | # 修改 95 | sql = table.update().where(table.c.id == 1).values(count=9) 96 | res = engine.execute(sql) 97 | print _execute_success(res) 98 | 99 | 100 | def delete(): 101 | sql = table.delete().where(table.c.id == 2) 102 | res = engine.execute(sql) 103 | print _execute_success(res) 104 | -------------------------------------------------------------------------------- /contributed_modules/redis/README.md: -------------------------------------------------------------------------------- 1 | ## 安装 2 | 3 | * ubuntu : `sudo pip install redis` 4 | -------------------------------------------------------------------------------- /contributed_modules/redis/redis_helper.py: -------------------------------------------------------------------------------- 1 | # -*- encoding:utf-8 -*- 2 | 3 | 4 | import redis 5 | 6 | 7 | class RedisHelper(object): 8 | def __init__(self, port=6379, host='127.0.0.1'): 9 | self.port = port 10 | self.host = host 11 | self.__conn = redis.Redis(host=self.host, port=self.port) 12 | 13 | def set(self, key, value): 14 | assert key 15 | self.__conn.set(key, value) 16 | return True 17 | 18 | def get(self, key): 19 | assert key 20 | return self.__conn.get(key) 21 | 22 | def keys(self, pattern='*'): 23 | return self.__conn.keys(pattern) 24 | 25 | def delete(self, *keys): 26 | return self.delete(keys) 27 | 28 | def subscribe(self, chanel): 29 | assert chanel 30 | pub = self.__conn.pubsub() 31 | pub.subscribe(chanel) 32 | if pub.parse_response(): # first validate connection 33 | # return chanel 34 | return pub 35 | return None 36 | 37 | def publish(self, chanel, message): 38 | assert chanel and message 39 | self.__conn.publish(chanel, message) 40 | return True 41 | 42 | 43 | """ 44 | TEST 45 | """ 46 | if __name__ == '__main__': 47 | r = RedisHelper() 48 | r.publish('fm001', 1) 49 | pub = r.subscribe('fm001') 50 | print pub 51 | while True: 52 | result = pub.parse_response() 53 | print result[2] 54 | -------------------------------------------------------------------------------- /contributed_modules/redis/redis_test.py: -------------------------------------------------------------------------------- 1 | # -*- encoding:utf-8 -*- 2 | 3 | import redis 4 | 5 | redis_client = redis.Redis() 6 | print redis_client 7 | 8 | 9 | def add_str(k, v): 10 | """ 11 | 添加字符串 12 | :param k:键 13 | :param v:值 14 | :return: 15 | """ 16 | redis_client.set(k, v) 17 | 18 | 19 | def get_str(k): 20 | """ 21 | 获取字符串 22 | :param k:键 23 | :return: 24 | """ 25 | return redis_client.get(k) 26 | 27 | 28 | def add_llist(k, l): 29 | """lpush 倒序返回""" 30 | redis_client.lpush(k, l) 31 | 32 | 33 | def add_rlist(k, l): 34 | """rpush 顺序返回""" 35 | redis_client.rpush(k, l) 36 | 37 | 38 | def get_list(k, start=0, end=-1): 39 | """ 40 | redis_client.lrange('list_descsort',0,-1) 41 | :param k: 42 | :param start: 43 | :param end: 44 | :return: 45 | """ 46 | return redis_client.lrange(k, start, end) 47 | 48 | 49 | def get_keys(pattern='*'): 50 | """ 51 | :param pattern: 'list*' 52 | :return: 53 | """ 54 | return redis_client.keys(pattern) 55 | 56 | 57 | def delete_key(*keys): 58 | redis_client.delete(keys) 59 | 60 | 61 | def redis_subscribe(chanel_name): 62 | """ 63 | redis 订阅频道 64 | :param chanel_name: 65 | :return: 66 | """ 67 | pub = redis_client.pubsub() 68 | pub.subscribe(chanel_name) 69 | if pub.parse_response(): 70 | return pub 71 | else: 72 | return None 73 | 74 | 75 | def redis_publish(chanel_name, **kwargs): 76 | """ 77 | 发布消息 78 | :param chanel_name: 79 | :param kwargs: 80 | :return: 81 | """ 82 | redis_client.publish(chanel_name, kwargs) 83 | 84 | 85 | # redis publish and subscribe 86 | # publish message 87 | ''' 88 | for i in range(10): 89 | redis_client.publish('fm101', 'hello i am %d' % i) 90 | ''' 91 | # subscribe chanel 92 | ''' 93 | pub = redis_client.pubsub() 94 | pub.subscribe('fm101') 95 | while True: 96 | print pub.parse_response() 97 | ''' 98 | 99 | if __name__ == '__main__': 100 | import time 101 | 102 | for i in range(10): 103 | time.sleep(2) 104 | redis_publish('fm001', k='hello') 105 | -------------------------------------------------------------------------------- /contributed_modules/requests/README.md: -------------------------------------------------------------------------------- 1 | # requests 2 | 3 | ## install 4 | 5 | * ubuntu : `sudo pip install requests` 6 | 7 | 8 | ## 使用代理 9 | proxies={u'http': u'61.186.164.98:8080',} 10 | 参数字典中,key只能为'http',value 为ip:port; 协议为https会自动解析 11 | -------------------------------------------------------------------------------- /contributed_modules/requests/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-4-23 下午11:14 4 | # @Author : tom.lee 5 | # @Site : 6 | # @File : __init__.py.py 7 | # @Software: PyCharm -------------------------------------------------------------------------------- /contributed_modules/requests/restful.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-9-2 上午11:32 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : restful.py 7 | # @Product : PyCharm 8 | # @Docs : 9 | # @Source : 10 | 11 | import requests 12 | 13 | 14 | def json_console_format(s): 15 | import json 16 | return json.dumps(s, indent=5) 17 | 18 | 19 | class TestCase(object): 20 | @classmethod 21 | def _response(cls, res): 22 | try: 23 | return res.status_code, json_console_format(res.json()) 24 | except (ValueError, Exception): 25 | return res.status_code, res.content 26 | 27 | def get(self, url, params=None, **kwargs): 28 | res = requests.get(url=url, params=params, verify=False, **kwargs) 29 | return self._response(res) 30 | 31 | def post(self, url, data=None, body=None, **kwargs): 32 | res = requests.post(url, data=data, json=body, verify=False, **kwargs) 33 | return self._response(res) 34 | 35 | def put(self, url, data=None, body=None, **kwargs): 36 | res = requests.put(url, data=data, json=body, verify=False, **kwargs) 37 | return self._response(res) 38 | 39 | def delete(self, url, **kwargs): 40 | res = requests.delete(url, verify=False, **kwargs) 41 | return self._response(res) 42 | 43 | def head(self, url, headers=None, **kwargs): 44 | res = requests.head(url, headers=headers or {}, verify=False, **kwargs) 45 | return self._response(res) 46 | 47 | def patch(self, url, data=None, body=None, **kwargs): 48 | res = requests.patch(url, data=data, json=body, verify=False, **kwargs) 49 | return self._response(res) 50 | 51 | -------------------------------------------------------------------------------- /contributed_modules/requests/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-7-25 下午3:08 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : test.py 7 | # @Product : PyCharm 8 | 9 | from restful import TestCase 10 | 11 | 12 | if __name__ == '__main__': 13 | t = TestCase() 14 | base_url = 'http://0.0.0.0:9091' 15 | 16 | # # get job list 17 | # print t.get(base_url + '/jobList')[1] 18 | # # get user list 19 | print t.get(base_url + '/jobs')[1] 20 | # add user 21 | # data = {'name': 'node-16', 'status': 'AVAILABLE'} 22 | # print t.post(base_url + '/nodes', data=data)[1] 23 | 24 | # edit user 25 | # data = {'job_id': 'node01-tick', 'status': 'pause'} 26 | # print t.put(base_url + '/jobs', data=data)[1] 27 | 28 | # delete user 29 | # data = {'user_id': 5} 30 | # print t.delete(base_url + '/mailUsers', data=data)[1] 31 | -------------------------------------------------------------------------------- /contributed_modules/requests/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-11 上午11:29 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : utils.py 7 | # @Product : PyCharm 8 | 9 | 10 | import requests 11 | from requests.packages.urllib3.exceptions import InsecureRequestWarning 12 | 13 | requests.packages.urllib3.disable_warnings(InsecureRequestWarning) 14 | openSSL_error = "'X509' object has no attribute '_x509'" 15 | 16 | 17 | def http_inspect(func): 18 | def wrapper(*args, **kwargs): 19 | try: 20 | return func(*args, **kwargs) 21 | except requests.exceptions.Timeout: 22 | raise ConnectionError('连接超时') 23 | except requests.exceptions.RequestException: 24 | raise ConnectionError('请求失败') 25 | except Exception, e: 26 | if e.message == openSSL_error: 27 | print """package error, please execute : 28 | pip install -U pyOpenSSL""" 29 | raise ConnectionError('连接失败') 30 | 31 | return wrapper 32 | 33 | 34 | class ConnectionError(Exception): 35 | """ 36 | 服务连接失败 37 | """ 38 | pass 39 | 40 | 41 | class RequestsUtils(object): 42 | user_agent = 'Mozilla/5.0 (X11; Linux x86_64) ' \ 43 | 'AppleWebKit/537.36 (KHTML, like Gecko) ' \ 44 | 'Ubuntu Chromium/50.0.2661.102 ' \ 45 | 'Chrome/50.0.2661.102 Safari/537.36' 46 | 47 | def __init__(self, headers=None, cookies=None, timeout=3, proxies=None, verify=False): 48 | self.headers = headers 49 | self.cookies = cookies 50 | self.timeout = timeout 51 | self.proxies = proxies 52 | self.verify = verify 53 | 54 | def _make_headers(self): 55 | headers = self.headers or {} 56 | if not headers.get('User-Agent'): 57 | headers['User-Agent'] = self.user_agent 58 | if self.cookies: 59 | headers['Cookie'] = self.cookies 60 | return headers 61 | 62 | @http_inspect 63 | def get(self, url, params=None): 64 | return requests.get(url, 65 | params=params, 66 | headers=self._make_headers(), 67 | timeout=self.timeout, 68 | proxies=self.proxies, 69 | verify=self.verify) 70 | 71 | 72 | if __name__ == '__main__': 73 | req = RequestsUtils() 74 | print req.get('http://192.168.1.111:8088/web') 75 | -------------------------------------------------------------------------------- /crawlers/spider/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | pip install beautifulsoup4 3 | docs: https://www.crummy.com/software/BeautifulSoup/bs4/doc/index.zh.html# 4 | """ -------------------------------------------------------------------------------- /crawlers/spider/downloader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-4-19 下午10:26 4 | # @Author : tom.lee 5 | # @Site : 6 | # @File : downloader.py 7 | # @Software: PyCharm 8 | import logging 9 | import requests 10 | from requests.packages.urllib3.exceptions import InsecureRequestWarning 11 | 12 | requests.packages.urllib3.disable_warnings(InsecureRequestWarning) 13 | 14 | 15 | # from .tools import Dir 16 | 17 | 18 | class HtmlDownloader(object): 19 | openSSL_error = "'X509' object has no attribute '_x509'" 20 | user_agent = 'Mozilla/5.0 (X11; Linux x86_64) ' \ 21 | 'AppleWebKit/537.36 (KHTML, like Gecko) ' \ 22 | 'Ubuntu Chromium/50.0.2661.102 ' \ 23 | 'Chrome/50.0.2661.102 Safari/537.36' 24 | 25 | def __init__(self, headers=None, cookies=None, timeout=10, proxies=None, verify=False): 26 | self.headers = headers 27 | self.cookies = cookies 28 | self.timeout = timeout 29 | self.proxies = proxies 30 | self.verify = verify 31 | 32 | def _make_headers(self): 33 | headers = self.headers or {} 34 | if not headers.get('User-Agent'): 35 | headers['User-Agent'] = self.user_agent 36 | if self.cookies: 37 | headers['Cookie'] = self.cookies 38 | return headers 39 | 40 | def _request(self, url): 41 | try: 42 | resp = requests.get( 43 | url, headers=self._make_headers(), timeout=self.timeout, 44 | proxies=self.proxies, verify=self.verify) 45 | return resp.status_code, resp.content 46 | except requests.exceptions.Timeout: 47 | logging.error('requests timeout: %s' % url) 48 | return 502, None 49 | except requests.exceptions.RequestException: 50 | logging.error('requests RequestException: %s' % url) 51 | return 500, None 52 | except Exception, e: 53 | if e.message == self.openSSL_error: 54 | print """package error, please execute : 55 | pip install -U pyOpenSSL""" 56 | else: 57 | logging.error('requests unKnow error: %s' % url) 58 | return 500, None 59 | 60 | def download(self, url, retry=-1): 61 | """ 62 | :param url: 63 | :param retry: 失败重试 64 | :return: 65 | """ 66 | code, content = self._request(url) 67 | if retry > 0 and code != 200: 68 | self.download(url, retry - 1) 69 | return content if code == 200 else '' 70 | 71 | -------------------------------------------------------------------------------- /crawlers/spider/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-4-21 上午12:36 4 | # @Author : tom.lee 5 | # @Site : 6 | # @File : main.py 7 | # @Software: PyCharm 8 | from downloader import HtmlDownloader 9 | from parser import HtmlParser 10 | from urlsmanager import URLSManager 11 | from writer import FileWriter 12 | from tools import Decorator 13 | from proxypools import Proxy, ProxiesPool 14 | 15 | 16 | class SpiderWorker(object): 17 | def __init__(self, url, size=20): 18 | self.url = url 19 | self.pool = ProxiesPool() 20 | self.parser = HtmlParser(url) 21 | self.url_manager = URLSManager(url_pattern=url, size=size) 22 | self.writer = FileWriter() 23 | 24 | @Decorator.time 25 | def start(self): 26 | self.url_manager.add_url(self.url) 27 | while self.url_manager.has_next(): 28 | hd = HtmlDownloader(proxies=self.pool.get_proxy_ip()) 29 | url = self.url_manager.get_url() 30 | data = hd.download(url) 31 | urls = self.parser.simple_tags(data, 'a', attributes=['href']) 32 | self.url_manager.add_urls([url_.get('href') for url_ in urls]) 33 | title = self.parser.element(data, 'title') 34 | title = title.getText() if title else 'unknown' 35 | self.writer.load_data('[%s] %s' % (title, url)) 36 | self.writer.writer() 37 | 38 | p=Proxy() 39 | p.start() 40 | SpiderWorker('http://www.jikexueyuan.com/').start() 41 | p.join() -------------------------------------------------------------------------------- /crawlers/spider/parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-4-19 下午10:10 4 | # @Author : tom.lee 5 | # @Site : 解析器 6 | # @File : parser.py 7 | # @Software: PyCharm 8 | 9 | 10 | 11 | import urlparse 12 | 13 | import bs4 14 | 15 | 16 | class HtmlParser(object): 17 | """ 18 | 网页解析器,可以继承此类,实现更复杂功能 19 | """ 20 | url_filed = 'href' 21 | parser = 'html.parser' 22 | encoding = 'utf-8' 23 | 24 | def __init__(self, base_url=None): 25 | self.__base_url = base_url 26 | 27 | def simple_tags(self, data, tag=None, patterns=None, attributes=None): 28 | """ 29 | 单个标签解析 30 | """ 31 | tags = self.__parser_tags(data, tag, patterns) 32 | return self.__tags(tags, attributes) 33 | 34 | def multilevel_tags(self, data, multilevel_patterns=None, attributes=None): 35 | """ 36 | 多标签解析 37 | """ 38 | if not multilevel_patterns: 39 | return data 40 | 41 | for tag_patterns in multilevel_patterns: 42 | tag, patterns = tag_patterns.items()[0] 43 | data = self.__parser_tags(data, tag, patterns) 44 | multilevel_patterns.remove(tag_patterns) 45 | 46 | if not multilevel_patterns: 47 | return self.__tags(data, attributes) 48 | 49 | return self.multilevel_tags(data, multilevel_patterns, attributes) 50 | 51 | def element(self, data, tag=None, patterns=None): 52 | """ 53 | 查询符合条件的第一个标签元素 54 | """ 55 | elements = self.elements(data, tag, patterns) 56 | return elements[0] if elements else None 57 | 58 | def elements(self, data, tag=None, patterns=None): 59 | return self.__parser_tags(data, tag, patterns) 60 | 61 | def __tags(self, data, attributes=None): 62 | tags = [dict(tag_.attrs, text='%s'.encode(self.encoding) % tag_.getText()) 63 | for tag_ in data] 64 | 65 | if not attributes: 66 | return tags 67 | 68 | for tag_attr in tags: 69 | for k, v in tag_attr.items(): 70 | if k in attributes: 71 | continue 72 | tag_attr.pop(k) 73 | 74 | if self.__base_url: 75 | return self.__format_url(tags) 76 | 77 | return tags 78 | 79 | def __parser_tags(self, data, tag=None, patterns=None): 80 | return self.__data_parser(data).find_all(tag, patterns) 81 | 82 | def __data_parser(self, data): 83 | return bs4.BeautifulSoup(str(data), self.parser, from_encoding=self.encoding) 84 | 85 | def __format_url(self, maps): 86 | for m in maps: 87 | if not m.get(self.url_filed): 88 | continue 89 | m[self.url_filed] = urlparse.urljoin(self.__base_url, m.get(self.url_filed)) 90 | return maps 91 | -------------------------------------------------------------------------------- /crawlers/spider/proxypools.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-4-22 下午11:04 4 | # @Author : tom.lee 5 | # @Site : 代理池 6 | # @File : proxyspools.py 7 | # @Software: PyCharm 8 | import logging 9 | import re 10 | import threading 11 | import time 12 | import random 13 | 14 | from tools import ReThread 15 | from downloader import HtmlDownloader 16 | from parser import HtmlParser 17 | 18 | lock = threading.Lock() 19 | 20 | 21 | class ProxiesPool(object): 22 | __instance = None 23 | __pool = [] 24 | 25 | def __new__(cls, *args): 26 | if not ProxiesPool.__instance: 27 | try: 28 | lock.acquire() 29 | if not ProxiesPool.__instance: 30 | ProxiesPool.__instance = object.__new__(cls, *args) 31 | except Exception, e: 32 | logging.error('ProxiesPool: init error : %s' % e) 33 | finally: 34 | lock.release() 35 | return ProxiesPool.__instance 36 | 37 | @property 38 | def pool(self): 39 | return self.__pool 40 | 41 | def add(self, ip): 42 | if not ip: 43 | return 44 | self.__pool.append(ip) 45 | 46 | def get_proxy_ip(self): 47 | if self.pool: 48 | proxies = self.pool[random.randint(0, len(self.pool) - 1)] 49 | else: 50 | proxies = None 51 | print proxies 52 | return proxies 53 | 54 | def __setattr__(self, key, value): 55 | pass 56 | 57 | def __dict__(self): 58 | pass 59 | 60 | 61 | class Proxy(ReThread): 62 | proxy_site = 'http://www.xicidaili.com/nn' 63 | 64 | def run(self): 65 | while self.is_running: 66 | self.__update_proxy_pool() 67 | time.sleep(60 * 15) 68 | 69 | @staticmethod 70 | def __re_number(s): 71 | if not s: 72 | return 0 73 | return float('%0.2f' % float(re.sub('[^\d+.\d+$]', '', s))) 74 | 75 | def __update_proxy_pool(self): 76 | downloader = HtmlDownloader() 77 | proxy_pool = ProxiesPool() 78 | parser = HtmlParser() 79 | data = downloader.download(self.proxy_site) 80 | speed_times = parser.multilevel_tags(data, [{'tr': None}, {'div': {'class': 'bar'}}]) 81 | ip_data = parser.elements(data, 'tr')[1:] 82 | speed = speed_times[::2] 83 | times = speed_times[1::2] 84 | for i, ip in enumerate(ip_data): 85 | d = {} 86 | for j, value in enumerate(filter(lambda x: x, ip_data[i].text.split('\n'))): 87 | if j == 0: 88 | d['ip'] = value 89 | elif j == 1: 90 | d['port'] = value 91 | continue 92 | if len(d.keys()) != 2: 93 | continue 94 | if self.__re_number(speed[i].get('title')) > 1 \ 95 | or self.__re_number(times[i].get('title')) > 1: 96 | continue 97 | 98 | proxy_pool.add({'http': '%s:%s' % (d.get('ip'), d.get('port'))}) 99 | 100 | -------------------------------------------------------------------------------- /crawlers/spider/tools.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-4-19 下午10:49 4 | # @Author : tom.lee 5 | # @Site : 6 | # @File : tools.py 7 | # @Software: PyCharm 8 | import logging 9 | import os 10 | import threading 11 | import time 12 | 13 | 14 | class Decorator(object): 15 | @staticmethod 16 | def time(func): 17 | def wrapper(*args, **kwargs): 18 | start = time.time() 19 | result = func(*args, **kwargs) 20 | logging.warning( 21 | '*******方法[%s]消耗时间%d s' % 22 | (func.__name__, time.time() - start)) 23 | return result 24 | 25 | return wrapper 26 | 27 | 28 | class Constants(object): 29 | parser = 'html.parser' 30 | encoding = 'utf-8' 31 | url_filed = 'href' 32 | 33 | 34 | class Dir(object): 35 | @staticmethod 36 | def create_dir(path): 37 | if not os.path.exists(path): 38 | try: 39 | os.makedirs(path) 40 | except Exception, e: 41 | print u'文件夹%s 创建失败;\n %s' % (path, e) 42 | else: 43 | print u'文件夹%s 已经存在' % path 44 | 45 | @staticmethod 46 | def parent_dir(path): 47 | path = path.rstrip('/') 48 | return '/'.join(path.split('/')[0:-1]) 49 | 50 | @staticmethod 51 | def del_dir(path): 52 | assert os.path.exists(path) and os.path.isdir(path) 53 | for root, dirs, files in os.walk(path, topdown=False): 54 | for name in files: 55 | os.remove(os.path.join(root, name)) 56 | for name in dirs: 57 | os.rmdir(os.path.join(root, name)) 58 | os.rmdir(path) 59 | 60 | @staticmethod 61 | def create_file(name, mode='r', data=""): 62 | try: 63 | parent_path = Dir.parent_dir(name) 64 | if parent_path and not os.path.exists(parent_path): 65 | Dir.create_dir(parent_path) 66 | with open(name, mode)as f: 67 | f.write(data) 68 | except Exception, e: 69 | print u'%s 创建失败\n异常:%s' % (name, e) 70 | 71 | 72 | class ReThread(threading.Thread): 73 | def __init__(self, *args, **kwargs): 74 | super(ReThread, self).__init__(*args, **kwargs) 75 | self.__flag = threading.Event() # 用于暂停线程的标识 76 | self.__flag.set() # 设置为True 77 | self.__running = threading.Event() # 用于停止线程的标识 78 | self.__running.set() # 将running设置为True 79 | 80 | @property 81 | def is_running(self): 82 | """ 83 | 获取运行标志 84 | :return: True/False 85 | """ 86 | return self.__running.isSet() 87 | 88 | def run(self): 89 | """ 90 | 使用while 循环,使用self.is_running 来获取运行标志位 91 | """ 92 | pass 93 | 94 | def stop(self): 95 | """ 96 | 设置为False, 让线程阻塞 97 | """ 98 | self.__flag.clear() 99 | 100 | def resume(self): 101 | """ 102 | 设置为True, 让线程停止阻塞 103 | """ 104 | self.__flag.set() 105 | 106 | def exit(self): 107 | """ 108 | 暂停标志设置为True 109 | 运行标志设置为False 110 | """ 111 | self.__flag.set() 112 | self.__running.clear() 113 | -------------------------------------------------------------------------------- /crawlers/spider/urlsmanager.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-4-19 下午10:18 4 | # @Author : tom.lee 5 | # @Site : 6 | # @File : urlsmanager.py 7 | # @Software: PyCharm 8 | import urlparse 9 | 10 | 11 | class URLSManager(object): 12 | def __init__(self, url_pattern=None, size=None): 13 | self.url_pattern = url_pattern 14 | self.size = size 15 | self.pending_urls = set() 16 | self.processed_urls = set() 17 | 18 | def has_next(self): 19 | return len(self.pending_urls) > 0 20 | 21 | def add_url(self, url): 22 | if not url: 23 | return 24 | url = url.rstrip('/') 25 | if self.url_pattern and urlparse.urlparse( 26 | self.url_pattern).netloc != urlparse.urlparse(url).netloc: 27 | return 28 | if url in self.pending_urls | self.processed_urls: 29 | return 30 | if self.size and len(self.processed_urls) >= self.size: 31 | self.pending_urls = set() 32 | return 33 | self.pending_urls.add(url) 34 | 35 | def add_urls(self, urls): 36 | if not urls: 37 | return 38 | [self.add_url(url) for url in urls] 39 | 40 | def get_url(self): 41 | url = self.pending_urls.pop() 42 | self.processed_urls.add(url) 43 | return url 44 | -------------------------------------------------------------------------------- /crawlers/spider/writer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-4-21 上午12:17 4 | # @Author : tom.lee 5 | # @Site : 6 | # @File : writer.py 7 | # @Software: PyCharm 8 | import sys 9 | 10 | reload(sys) 11 | sys.setdefaultencoding('utf8') 12 | 13 | 14 | class FileWriter(object): 15 | def __init__(self, file_name=None): 16 | self.file_name = file_name or 'data.txt' 17 | self._data = [] 18 | 19 | def load_data(self, data): 20 | if not data: 21 | return 22 | self._data.append(data) 23 | 24 | def writer(self): 25 | f = open(self.file_name, 'wb+') 26 | [f.write('%s\n\n' % d) for d in self._data] 27 | f.close() 28 | -------------------------------------------------------------------------------- /data_analysis/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2017/7/23 12:09 4 | # @Author : Tom.lee 5 | # @Site : 6 | # @File : __init__.py.py 7 | # @Software: PyCharm 8 | 9 | -------------------------------------------------------------------------------- /data_analysis/academic_concept/matrix_product.md: -------------------------------------------------------------------------------- 1 | # 矩阵乘法 2 | > 矩阵相乘最重要的方法是一般矩阵乘积。 3 | 它只有在第一个矩阵的列数(column)和第二个矩阵的行数(row)**相同**时才有意义。 4 | 一般单指矩阵乘积时,指的便是一般矩阵乘积。一个m×n的矩阵就是m×n个数排成m行n列的一个数阵。 5 | 由于它把许多数据紧凑的集中到了一起,所以有时候可以简便地表示一些复杂的模型。 6 | 7 | ## 定义 8 | > 设A为 `m*p` 的矩阵,B为 `p*n` 的矩阵,那么称 `m*n` 的矩阵C为矩阵A与B的乘积, 9 | 记作 `C=AB` ,其中矩阵C中的第 `i` 行第 `j` 列元素可以表示为: 10 | 11 | -------------------------------------------------------------------------------- /data_analysis/study_matplotlib/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2017/7/24 21:46 4 | # @Author : Tom.lee 5 | # @Site : 6 | # @File : __init__.py.py 7 | # @Software: PyCharm 8 | 9 | -------------------------------------------------------------------------------- /data_analysis/study_matplotlib/graphs/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2017/7/30 20:47 4 | # @Author : Tom.lee 5 | # @Site : 6 | # @File : __init__.py.py 7 | # @Software: PyCharm 8 | 9 | -------------------------------------------------------------------------------- /data_analysis/study_matplotlib/graphs/graphs_histogram.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2017/7/30 20:49 4 | # @Author : Tom.lee 5 | # @Site : 6 | # @File : graphs_histogram.py 7 | # @Software: PyCharm 8 | 9 | """ 10 | 直方图 11 | """ 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | 15 | N = 5 16 | # 男生分数 17 | menMeans = (20, 35, 30, 35, 27) 18 | # 女生分数 19 | womenMeans = (25, 32, 34, 20, 25) 20 | menStd = (2, 3, 4, 1, 2) 21 | womenStd = (3, 5, 2, 3, 3) 22 | ind = np.arange(N) # the x locations for the groups 23 | width = 0.35 # the width of the bars: can also be len(x) sequence 24 | 25 | p1 = plt.bar(ind, menMeans, width, color='#d62728', yerr=menStd) 26 | p2 = plt.bar(ind, womenMeans, width, bottom=menMeans, yerr=womenStd) 27 | 28 | plt.ylabel('Scores') 29 | plt.title('Scores by group and gender') 30 | plt.xticks(ind, ('G1', 'G2', 'G3', 'G4', 'G5')) # 设置x轴刻度 31 | plt.yticks(np.arange(0, 81, 5)) # 设置y轴刻度 32 | plt.legend((p1[0], p2[0]), ('Men', 'Women')) 33 | plt.savefig("../save_file/graphs_histogram.png") 34 | plt.show() 35 | -------------------------------------------------------------------------------- /data_analysis/study_matplotlib/graphs/graphs_quadratic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2017/7/30 1:38 4 | # @Author : Tom.lee 5 | # @File : graphs_quadratic.py 6 | # @Software: PyCharm 7 | 8 | 9 | """ 10 | numpy 多项式 与 matplotlib 二次函数折线图 11 | """ 12 | 13 | import warnings 14 | 15 | import matplotlib.pyplot as plt 16 | import numpy as np 17 | 18 | # 如果最小二乘拟合中的矩阵是秩不足,则引发警告。只有在满 == False时,才会发出警告。 19 | # 警告可以通过以下方式关闭: 20 | warnings.simplefilter('ignore', RuntimeWarning) 21 | warnings.simplefilter('ignore', np.RankWarning) 22 | 23 | 24 | def foo(x0): 25 | # 由图可知,函数分3段,周期函数 26 | # 设:y= kx + b ,且b=0 27 | c0, hc, c = 0.4, 1.0, 0.6 28 | if x0 > 1: 29 | x0 = float(x0) % 1.0 30 | if x <= c0: 31 | k = hc / c0 32 | return k * x0 33 | elif x < c: 34 | k = (hc - 0) / (c0 - c) 35 | return k * (x0 - c) 36 | else: 37 | return 0 38 | 39 | 40 | # x 的取值 0到2 范围取50个点 41 | x = np.linspace(0, 2, 50, dtype=np.float64) 42 | 43 | # 计算对应的y值,并转换为nparray 对象 44 | y = np.array(map(foo, x)).astype(np.float64) 45 | 46 | print "x值:", x 47 | print "y值:", y 48 | 49 | m = np.polyfit(x, y, 20) # 调整拟合多项式的度为20,生成多项式参数 50 | print "多项式参数:", m 51 | 52 | 53 | # 一维多项式 54 | p1 = np.poly1d(m) # 根据多项式参数构造一维多项式 55 | print "一维多项式:", p1 56 | 57 | # 根据x使用多项式求解y值, 58 | # yp=np.polyval(np.polyfit(x, y, 20),x),x可以是单个值也可以是列表 59 | y1 = p1(x) 60 | print "一维多项式根据x的计算值:", y1 61 | 62 | # 以点(“.”)绘制实际值折线 63 | plot1 = plt.plot(x, y, '.', label='original values') 64 | # 以线(“r”)绘制最小二乘拟合折线 65 | plot2 = plt.plot(x, y1, 'r', label='polyfit values') 66 | 67 | # x轴描述 68 | plt.xlabel('X') 69 | # Y轴描述 70 | plt.ylabel('Y') 71 | # plt.legend(loc=1) # 指定legend的位 72 | # 标题 73 | plt.title('y = kx (0>> import numpy as np 13 | Examples 14 | -------- 15 | 16 | >>> np.random.rand(3,2) 17 | array([[ 0.14022471, 0.96360618], #random 18 | [ 0.37601032, 0.25528411], #random 19 | [ 0.49313049, 0.94909878]]) #random 20 | 21 | 22 | >>> np.dot(3, 4) 23 | 12 24 | 25 | Neither argument is complex-conjugated: 26 | 27 | >>> np.dot([2j, 3j], [2j, 3j]) 28 | (-13+0j) 29 | 30 | For 2-D arrays it is the matrix product: 31 | 32 | >>> a = [[1, 0], [0, 1]] 33 | >>> b = [[4, 1], [2, 2]] 34 | >>> np.dot(a, b) 35 | array([[4, 1], 36 | [2, 2]]) 37 | 38 | >>> a = np.arange(3*4*5*6).reshape((3,4,5,6)) 39 | >>> b = np.arange(3*4*5*6)[::-1].reshape((5,4,6,3)) 40 | >>> np.dot(a, b)[2,3,2,1,2,2] 41 | 499128 42 | >>> sum(a[2,3,2,:] * b[1,2,:,2]) 43 | 499128 44 | 45 | """ 46 | 47 | # ############################### 一维 ############################### 48 | """ 49 | 参数个数相同: 50 | """ 51 | 52 | print np.dot(3, 4) # 3*4 -> 12 53 | print np.dot([1, 2, 3], [4, 5, 6]) # 1 * 4 + 2 * 5 + 3 * 6 -> 32 54 | 55 | """ 56 | 参数列表不同(短的参数元素个数只能为1,且不能为列表[]类型): 57 | 如: 58 | >>> np.dot([1, 2, 3], [4, 5]) 59 | ValueError: shapes (3,) and (2,) not aligned: 3 (dim 0) != 2 (dim 0) 60 | 61 | >>> np.dot([1, 2, 3], [4])  62 | ValueError: shapes (3,) and (1,) not aligned: 3 (dim 0) != 1 (dim 0) 63 | 64 | >>> np.dot([1, 2, 3], 4) 65 | [ 4 8 12] 66 | 67 | """ 68 | print np.dot([1, 2, 3], 4) # [1*4,2*4,3*4] -> [ 4 8 12] 69 | 70 | 71 | # ############################### 二维 ############################### 72 | """ 73 | 参数个数相同: 74 | 75 | 计算过程: 76 | 77 | 第一轮: 78 | 1. A中取第一个元素[x1, y1] 79 | B中取各个元素中的第一个值[m1, m2] 80 | 矩阵相乘-> x1*m1+y1*m2 81 | 82 | 2. A中取第一个元素[x1, y1] 83 | B中取各个元素中的第二个值[n1, n2] 84 | 矩阵相乘-> x1*n1+y1*n2 85 | --> [[ 77 110]] 86 | 第二轮: 87 | 1. A中取第二个元素[x2, y2] 88 | B中取各个元素中的第一个值[m1, m2] 89 | 矩阵相乘-> x2*m1+y2*m2 90 | 91 | 2. A中取第二个元素[x2, y2] 92 | B中取各个元素中的第二个值[n1, n2] 93 | 矩阵相乘-> x2*n1+y2*n2 94 | --> [[ 77 110] [165 242]] 95 | 96 | 97 | """ 98 | 99 | x1, y1 = 1, 2 100 | x2, y2 = 3, 4 101 | 102 | m1, n1 = 11, 22 103 | m2, n2 = 33, 44 104 | 105 | A = [[x1, y1], [x2, y2]] # 行 106 | B = [[m1, n1], [m2, n2]] # 列 107 | 108 | print np.dot(A, B) 109 | # [[ 77 110] 110 | # [165 242]] 111 | 112 | print '测试计算过程:' 113 | print x1 * m1 + y1 * m2, x1 * n1 + y1 * n2 # 77 110 114 | print x2 * m1 + y2 * m2, x2 * n1 + y2 * n2 # 165 242 115 | 116 | 117 | def my_dot_w2(a, b): 118 | # 判断是否为列表 119 | if isinstance(a, list) and isinstance(b, list): 120 | assert len(a) == len(b) 121 | l1, l2 = a, b 122 | result = [] 123 | 124 | if isinstance(l1[0], list): # 判断是否为多维数组 125 | size = len(l1) 126 | for index, value in enumerate(l1): 127 | start, cell = 0, [] 128 | 129 | while start < size: 130 | cell.append(my_dot_w2(value, map(lambda x: x[start], l2))) 131 | start += 1 132 | 133 | result.append(cell) 134 | return result 135 | 136 | else: # 一维数组 137 | return sum(map(lambda j: l1[j] * l2[j], xrange(len(l1)))) 138 | 139 | # 以下为数字与数组的矩阵算法,找出集合 140 | elif isinstance(a, list) and isinstance(b, int): 141 | return map(lambda x: x * b, a) 142 | 143 | elif isinstance(b, list) and isinstance(a, int): 144 | return map(lambda x: x * a, b) 145 | 146 | # 都为数字的算法 147 | elif isinstance(a, int) and isinstance(b, int): 148 | return a * b 149 | 150 | # 其他情况抛出异常 151 | else: 152 | raise Exception('params must be "list or int"!') 153 | 154 | 155 | print '**' * 50 156 | print my_dot_w2([1, 2], 3) # 1*3,2*3 = [3, 6] 157 | print np.dot([1, 2], 3) 158 | 159 | print my_dot_w2(3, [1, 2]) # 3*1,3*2 = [3, 6] 160 | print np.dot(3, [1, 2]) 161 | 162 | print my_dot_w2([1, 2], [3, 4]) # 1*3+2*4 = 11 163 | print np.dot([1, 2], [3, 4]) 164 | 165 | print my_dot_w2(A, B) 166 | print np.dot(A, B) 167 | 168 | 169 | 170 | 171 | 172 | -------------------------------------------------------------------------------- /data_analysis/study_numpy/numpy_functions/np_mgrid_ogrid.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-9-7 下午3:15 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : np_mgrid_ogrid.py 7 | # @Product : PyCharm 8 | # @Docs : 9 | # @Source : 10 | 11 | 12 | # #创建网格索引 13 | """ 14 | >>> import numpy as np 15 | 16 | 17 | # 密集网格np.mgrid 18 | >>> mgrid = np.lib.index_tricks.nd_grid() 19 | >>> mgrid[0:5,0:5] 20 | array([[[0, 0, 0, 0, 0], 21 | [1, 1, 1, 1, 1], 22 | [2, 2, 2, 2, 2], 23 | [3, 3, 3, 3, 3], 24 | [4, 4, 4, 4, 4]], 25 | [[0, 1, 2, 3, 4], 26 | [0, 1, 2, 3, 4], 27 | [0, 1, 2, 3, 4], 28 | [0, 1, 2, 3, 4], 29 | [0, 1, 2, 3, 4]]]) 30 | >>> mgrid[-1:1:5j] 31 | array([-1. , -0.5, 0. , 0.5, 1. ]) 32 | 33 | 34 | # 稀疏网格np.ogrid 35 | >>> ogrid = np.lib.index_tricks.nd_grid(sparse=True) 36 | >>> ogrid[0:5,0:5] 37 | [array([[0], 38 | [1], 39 | [2], 40 | [3], 41 | [4]]), array([[0, 1, 2, 3, 4]])] 42 | 43 | 44 | """ 45 | -------------------------------------------------------------------------------- /data_analysis/study_numpy/numpy_functions/np_random.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-9-7 下午3:12 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : np_random.py 7 | # @Product : PyCharm 8 | # @Docs : 9 | # @Source : 10 | 11 | """ 12 | >>> import numpy as np 13 | 14 | 15 | # 创建随机二维数组 16 | >>> np.random.rand(3,2) 17 | array([[ 0.14022471, 0.96360618], #random 18 | [ 0.37601032, 0.25528411], #random 19 | [ 0.49313049, 0.94909878]]) #random 20 | 21 | """ 22 | -------------------------------------------------------------------------------- /data_analysis/study_numpy/numpy_multidimensional.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2017/7/23 13:12 4 | # @Author : Tom.lee 5 | # @Site : 6 | # @File : numpy_list_multidimensional.py 7 | # @Software: PyCharm 8 | 9 | """ 10 | numpy 多维数组 11 | """ 12 | import numpy as np 13 | 14 | 15 | def split_line(): 16 | print '*' * 6 ** 2 17 | 18 | 19 | a = np.arange(10, 0, -1) 20 | print a 21 | b = np.arange(100, 200, 10) 22 | print b 23 | split_line() 24 | 25 | 26 | def multi_2(): 27 | """ 28 | 构建 x,y 二维数组 29 | 30 | reshape(纵轴高度,横轴高度) 31 | :return: 32 | """ 33 | y = a.reshape(-1, 1) # 表示y轴 34 | x = b # 表示x轴 35 | xy = y + x # 表示y轴元素与x序列每个元素想加 36 | print xy 37 | 38 | print '下标(5,5):', xy[5, 5] 39 | print xy.shape 40 | split_line() 41 | 42 | 43 | def multi_2_func(): 44 | """ 45 | 使用函数创建2维数组 46 | :return: 47 | """ 48 | print np.fromfunction(lambda x, y: (x + 1) * y, (10, 5)) 49 | split_line() 50 | 51 | 52 | def sin(): 53 | """ 54 | 正弦函数 55 | :return: 56 | """ 57 | x = np.linspace(0, 2 * np.pi, 10) 58 | 59 | # 使用np.sin(x)对 每个x中的元素求正弦值,x值不变 60 | y = np.sin(x) 61 | print x, '\n', y 62 | split_line() 63 | 64 | # 使用np.sin(x,x) 对每个x中的元素求正弦值,并赋值给x, 即x,z 共享内存空间 65 | z = np.sin(x, x) 66 | print x, '\n', z 67 | split_line() 68 | 69 | 70 | if __name__ == '__main__': 71 | # multi_2() 72 | # multi_2_func() 73 | sin() 74 | -------------------------------------------------------------------------------- /data_analysis/study_numpy/numpy_ndarray.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-7-22 上午12:41 4 | # @Author : tom.lee 5 | # @File : study_numpy.py 6 | # @Software: PyCharm 7 | 8 | """ 9 | numpy 10 | Numpy是Python的一个科学计算的库,提供了矩阵运算的功能 11 | """ 12 | 13 | import numpy as np 14 | 15 | 16 | def split_line(): 17 | print '*' * 6 ** 2 18 | 19 | 20 | def np_version(): 21 | """ 22 | 版本 23 | :return: 24 | """ 25 | print np.version.version 26 | 27 | 28 | def np_list(): 29 | """ 30 | numpy 数组 : 31 | 32 | 只能存储一种数据结构, 33 | 使用 "numpy.array()"来创建, 34 | 使用" dtype = numpy.类型" 来显示指定 35 | 36 | :return: 37 | """ 38 | # 创建 39 | l = np.array([1, 2, 3], dtype=np.int8) 40 | a = np.array([1, 2, 3, 4]) 41 | b = np.array((5, 6, 7, 8)) 42 | c = np.array([[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]]) 43 | print 'l:', l 44 | print 'a:', a 45 | print 'b:', b 46 | print 'c:', c 47 | split_line() 48 | 49 | # 类型 50 | print l.dtype, c.dtype 51 | split_line() 52 | 53 | # 大小: 数组a的shape只有一个元素,因此它是一维数组。 54 | # 而数组c的shape有两个元素,因此它是二维数组,其中第0轴的长度为3,第1轴的长度为4 55 | print l.shape, c.shape 56 | split_line() 57 | 58 | # 改变数组每个轴的长度 : 只是改变每个轴的大小,数组元素在内存中的位置并没有改变 59 | c.shape = 4, 3 60 | print c 61 | split_line() 62 | 63 | # 当某个轴的元素为-1时,将根据数组元素的个数自动计算此轴的长度,因此下面的程序将数组c的shape改为了(2,6) 64 | c.shape = 2, -1 65 | print c 66 | split_line() 67 | 68 | # 使用数组的reshape方法,可以创建一个改变了尺寸的新数组,原数组的shape保持不变 69 | # 注意此时数组a和d其实共享数据存储内存区域 70 | d = a.reshape((2, 2)) 71 | print 'a:', a 72 | print 'd:', d 73 | split_line() 74 | 75 | 76 | def np_list_create(): 77 | # 使用xrange创建一维数组 [start,end,步长)包含起始位置,不包含终止位置, 78 | # 元素个数: (end-start)/步长 79 | np_lst = np.arange(0, 10, 1) 80 | print np_lst 81 | print '大小:%d' % np_lst.shape 82 | split_line() 83 | 84 | # 等差数列 85 | # linspace(strat,end,size), [start,end]包含起始位置和终止位置,一共创建size个元素 86 | # 可以通过endpoint关键字指定是否包括终值 87 | print np.linspace(0, 1, 12) 88 | split_line() 89 | 90 | # 等比数列 91 | # logspace(开始指数,结束指数,数量,底数默认10) 92 | print np.logspace(0, 2, 20) 93 | split_line() 94 | 95 | 96 | def np_list_by_byte(): 97 | """ 98 | 使用frombuffer, fromstring, fromfile等函数可以从字节序列创建数组 99 | 使用时一定要传入dtype参数 100 | 101 | Python的字符串实际上是字节序列,每个字符占一个字节, 102 | 因此如果从字符串s创建一个8bit的整数数组的话,所得到的数组正好就是字符串中每个字符的ASCII编码 103 | :return: 104 | """ 105 | s = 'abcdefg' 106 | print np.frombuffer(s, dtype=np.int8) 107 | split_line() 108 | 109 | print np.fromstring(s, dtype=np.int8) 110 | split_line() 111 | 112 | # 如果从字符串s创建16bit的整数数组,那么两个相邻的字节就表示一个整数, 113 | # 把字节98和字节97当作一个16位的整数, 它的值就是98*256+97 = 25185。 114 | # 可以看出内存中是以little endian(低位字节在前)方式保存数据的。 115 | # 所以字符串的长度必须是偶数 116 | print np.fromstring('abcdefgh', dtype=np.int16) 117 | split_line() 118 | 119 | 120 | def np_list_by_func(): 121 | """ 122 | 通过函数创建数组 123 | :return: 124 | """ 125 | # fromfunction 传入一个函数,和表示一个维度大小的可迭代对象(元组,列表) 126 | # 即(10,)表示一维数组,一维元素10个,此时函数接收一个参数 127 | # (5,6)表示二维数组,一维元素5个,二维元素6个,此时函数接收2个参数 128 | print np.fromfunction(lambda x: x + 1, (10,)) 129 | print np.fromfunction(lambda x, y: (x + 1) * (y + 1), (5, 6)) 130 | split_line() 131 | 132 | 133 | def np_list_opt(): 134 | """ 135 | numpy 列表基本操作和python list基本一致 136 | :return: 137 | """ 138 | l = np.arange(10, 1, -1) 139 | print l 140 | print '做小值:', l.min() 141 | print '最大值:', l.max() 142 | print '下标0的元素:', l[0] 143 | split_line() 144 | 145 | # 高级用法,不会共享内存空间,以上操作会共享内存空间 146 | print l[np.array([1, 5, 3])] # 使用数组获取下标元素 147 | print l[[1, 5, 3]] # 使用列表获取下标元素  148 | split_line() 149 | 150 | # 列表直接过滤 151 | print l[l > 3] # 直接获取列表大于3的值 152 | print l > 3 # 判断列表元素是否大于3返回一个boolean 列表 153 | split_line() 154 | 155 | 156 | if __name__ == '__main__': 157 | # np_version() 158 | # np_list() 159 | # np_list_create() 160 | # np_list_by_byte() 161 | # np_list_by_func() 162 | # np_list_opt() 163 | print np.fromfunction(lambda x: x, (10,)) 164 | -------------------------------------------------------------------------------- /data_analysis/study_numpy/numpy_polynomial_poly1d.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2017/7/30 13:44 4 | # @Author : Tom.lee 5 | # @File : numpy_polynomial_poly1d.py 6 | # @Software: PyCharm 7 | 8 | """ 9 | 多项式 10 | """ 11 | import numpy as np 12 | 13 | # 构造多项式 14 | p1 = np.poly1d([1]) # 1 15 | p2 = np.poly1d([1, 2]) # x + 2 16 | p3 = np.poly1d([1, 2, 3]) # x^2 + 2x + 3 17 | p4 = np.poly1d([1, 2, 3, 4]) # x^3 + 2* x^2 + 3x + 4 18 | print '\np1:', p1 19 | print '\np2:', p2 20 | print '\np3:', p3 21 | print '\np4:', p4 22 | # 评估x = 0.5处的多项式: 23 | print '\n\n求函数0.5处的值:' 24 | print p1(0.5), 1 25 | print p2(0.5), 0.5 + 2 26 | print p3(0.5), 0.5 ** 2 + 2 * 0.5 + 3 27 | print p4(0.5), 0.5 ** 3 + 2 * 0.5 ** 2 + 3 * 0.5 + 4 28 | # 解 29 | print '\n\n解:' 30 | print p1.r 31 | print p2.r 32 | print p3.r 33 | print p4.r 34 | 35 | 36 | print "*" * 20, 'Y = X + 1', "*" * 20 37 | x = np.linspace(0, 1, 10) # 构造x 38 | y = np.array(map(lambda x: x + 1, x)) # 计算y 39 | m = np.polyfit(x, y, 2) # 拟合多项式参数 40 | y1 = np.poly1d(m) # 构造多项式 41 | print "\nx取值:", x 42 | print "\ny取值:", y 43 | print "\n多项式参数:", m 44 | print "\n一维多项式:", y1 45 | print "\n使用一维多项式计算y值:", y1(x) 46 | print "\n计算一维多项式的解:", y1.r 47 | print "\n比较多项式与函数计算的结果:", "多项式:", y1(120), " 函数:", 120 + 1 48 | -------------------------------------------------------------------------------- /data_analysis/study_numpy/numpy_ufunc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2017/7/23 21:51 4 | # @Author : Tom.lee 5 | # @Site : 6 | # @File : numpy_ufunc.py 7 | # @Software: PyCharm 8 | 9 | """ 10 | ufunc是universal function的缩写,它是一种能对数组的每个元素进行操作的函数。 11 | NumPy内置的许多ufunc函数都是在C语言级别实现的,因此它们的计算速度非常快 12 | """ 13 | import numpy as np 14 | 15 | 16 | def foo(x): 17 | # 由图可知,函数分3段,周期函数 18 | # 设:y= kx + b ,且b=0 19 | c0, hc, c = 0.4, 1.0, 0.6 20 | if x > 1: 21 | x = float(x) % 1.0 22 | if x <= c0: 23 | k = hc / c0 24 | return k * x 25 | elif x < c: 26 | k = (hc - 0) / (c0 - c) 27 | return k * (x - c) 28 | else: 29 | return 0 30 | 31 | 32 | X = np.linspace(0, 2, 20) 33 | Y = np.array(map(foo, X)) 34 | print X 35 | print Y 36 | -------------------------------------------------------------------------------- /data_analysis/study_numpy/png/numpy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomoncle/Python-notes/ce675486290c3d1c7c2e4890b57e3d0c8a1228cc/data_analysis/study_numpy/png/numpy.png -------------------------------------------------------------------------------- /data_analysis/study_tesseract/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-7 下午12:52 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : __init__.py.py 7 | # @Product : PyCharm 8 | -------------------------------------------------------------------------------- /data_analysis/study_tesseract/image/20170807142300.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomoncle/Python-notes/ce675486290c3d1c7c2e4890b57e3d0c8a1228cc/data_analysis/study_tesseract/image/20170807142300.png -------------------------------------------------------------------------------- /data_analysis/study_tesseract/test01.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-7 下午1:21 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : test01.py 7 | # @Product : PyCharm 8 | 9 | """ 10 | # BASH 11 | 12 | $ tesseract image_path out 13 | $ cat out.txt 14 | """ 15 | import Image 16 | 17 | import pytesseract 18 | 19 | file_obj = Image.open('./image/20170807142300.png') 20 | print pytesseract.image_to_string(file_obj) 21 | -------------------------------------------------------------------------------- /data_structure.md: -------------------------------------------------------------------------------- 1 | # 数据结构 2 | 查看python对象在内存的地址可以使用内置的`id()`方法来查看.例如`id(3)` 3 | 4 | ## 基本数据类型 5 | 6 | ## 引用数据类型 7 | 8 | ## 可变对象 9 | 10 | ## 不可变对象 11 | -------------------------------------------------------------------------------- /decorator.md: -------------------------------------------------------------------------------- 1 | # decorator装饰器 2 | 3 | 装饰器本质上是一个Python函数,它可以让其他函数在不需要做任何代码变动的前提下增加额外功能, 4 | 装饰器的返回值也是一个函数对象。 5 | 它经常用于有切面需求的场景,比如:插入日志、性能测试、事务处理、缓存、权限校验等场景。 6 | 概括的讲,装饰器的作用就是为已经存在的对象添加额外的功能。 7 | 8 | #函数装饰器 9 | ###简单的装饰器 10 | ```python 11 | def decorator(func): 12 | """ 13 | func 即为调用该函数的方法 14 | """ 15 | def wrapper(*args, **kwargs): 16 | print '方法%s调用装饰器' % func.__name__ 17 | return func(*args, **kwargs) 18 | 19 | return wrapper 20 | 21 | 22 | @decorator 23 | def show(): 24 | """ 25 | @符号是装饰器的语法糖,在定义函数的时候使用,避免再一次赋值操作 26 | 使用@语法糖,等价于 show=decorator(show) 27 | """ 28 | print '......show......' 29 | 30 | 31 | 32 | show() 33 | 34 | ``` 35 | ###带参数的装饰器 36 | ```python 37 | def logging(level): 38 | """ 39 | 对简单装饰器的一次封装,返回一个新的装饰器,传递了level参数 40 | """ 41 | 42 | def decorator(func): 43 | def wrapper(*args, **kwargs): 44 | if level == "warn": 45 | print ("%s is running" % func.__name__) 46 | return func(*args, **kwargs) 47 | 48 | return wrapper 49 | 50 | return decorator 51 | 52 | 53 | @logging(level="warn") 54 | def hell(name='foo'): 55 | print("i am %s" % name) 56 | 57 | 58 | hell() 59 | ``` 60 | #类装饰器 61 | 相比函数装饰器,类装饰器具有灵活度大、高内聚、封装性等优点。 62 | 使用类装饰器还可以依靠类内部的`__call__`方法, 63 | 当使用`@`形式将装饰器附加到函数上时,就会调用此方法。 64 | 65 | ```python 66 | class Logging(object): 67 | def __init__(self, func): 68 | self._func = func 69 | 70 | def __call__(self): 71 | print ('Logging starting') 72 | self._func() 73 | print ('Logging ending') 74 | 75 | 76 | @Logging 77 | def tes(): 78 | print ('bar') 79 | 80 | 81 | tes() 82 | ``` 83 | 84 | #装饰器缺点 85 | ###举例 86 | 使用装饰器极大地复用了代码,但是他有一个缺点就是"原函数的元信息"不见了 87 | ```python 88 | def decorator(func): 89 | def wrapper(*args, **kwargs): 90 | print func.__name__, func.__doc__, 'call decorator' 91 | return func(*args, **kwargs) 92 | 93 | return wrapper 94 | 95 | 96 | @decorator 97 | def show(): 98 | """ show test """ 99 | print '......' 100 | 101 | 102 | show() 103 | print show.__name__ # wrapper 104 | print show.__doc__ # None 105 | ``` 106 | ###改进装饰器 107 | 使用`functools.wraps`装饰器,它能把原函数的元信息拷贝到装饰器函数中, 108 | 这使得装饰器函数也有和原函数一样的元信息了。 109 | ```python 110 | from functools import wraps 111 | 112 | def decorator(func): 113 | @wraps(func) 114 | def wrapper(*args, **kwargs): 115 | print func.__name__, func.__doc__, 'call decorator' 116 | return func(*args, **kwargs) 117 | 118 | return wrapper 119 | 120 | 121 | @decorator 122 | def show(): 123 | """ show test """ 124 | print '......' 125 | 126 | 127 | show() 128 | print show.__name__ # show 129 | print show.__doc__ # show test 130 | 131 | ``` 132 | 133 | 134 | # 内置装饰器 135 | @staticmathod、@classmethod、@property 136 | 137 | 138 | # 装饰器的顺序 139 | ```python 140 | @a 141 | @b 142 | @c 143 | def f (): 144 | pass 145 | # 等效于 146 | f = a(b(c(f))) 147 | ``` 148 | 149 | 150 | -------------------------------------------------------------------------------- /dict.md: -------------------------------------------------------------------------------- 1 | # python 字典dict 2 | 无序,k-v对存在,查找速度快,占用内存高,key是唯一值,不能重复。 3 | 在python终端,使用`dir(dict)`获取方法及属性列表,使用`help(dict)`获取其使用方法 4 | 5 | # 基本操作 6 | ### 声明 7 | * `声明`:`dict={}` 8 | * `声明并赋值`:`dict={'key':'value','num':1,'list':[1,2],'tup':(1,2,3)}` 9 | 10 | ### 添加或修改 11 | * `dict['key'] = 'value'`:使用dict['key']=value的方式为字典重新赋值,或添加元素 12 | 13 | ### 删除 14 | * `d.pop('k')` : 删除字典指定的k,并且返回该k的值 15 | * `del d['k']` : 删除字典的指定k 16 | * `del dict` : 删除整个字典 17 | 18 | ### 取值 19 | * `dict['k']` : 当dict不存在k时,抛出异常 20 | * `dict.get('k')` : 当dict不存在k时,返回None 21 | 22 | ### 遍历 23 | * 获取keys列表:`dict.keys()` 24 | * 获取值列表 :`dict.values()` 25 | * 获取(k,v)元组列表:`dict.items()` 26 | * 获取以上列表的可迭代对象,需要使用`dict.iterkeys(); dict.itervalues(); dict.iteritems()` 27 | ```python 28 | for k,v in dict.items(): 29 | print 'key:',k," -value:",v 30 | ``` 31 | 32 | # 转换 33 | * str 转 dict : 34 | * `eval()`函数: 使用`eval()`函数可以使字符串转为字典 `eval(str)` 35 | * `exec()`函数: 需要声明一个被赋值的变量 36 | ```python 37 | >>> s="{'k':1,'w':2}" 38 | >>> d=None 39 | >>> exec('d='+s) 40 | >>> d 41 | {'k': 1, 'w': 2} 42 | >>> d['k'] 43 | 1 44 | >>> 45 | ``` 46 | * json模块:转换带特殊字符的字典 47 | ```shell 48 | >>> s='[{"RepoDigests": null,"Created":1466711701,"Size":5042677,"VirtualSize":5042677,"Labels":null}]' 49 | >>> import json 50 | >>> print json.loads(s) 51 | [{u'Labels': None, u'Size': 5042677, u'RepoDigests': None, u'VirtualSize': 5042677, u'Created': 1466711701}] 52 | >>> 53 | ``` 54 | * dict 转 str : `str(dict)` 55 | 56 | # 技巧 57 | ### 按顺序获取dict的元素 58 | ```python 59 | keys=dict.keys() 60 | for k in keys.sort(): 61 | print dict.get('k') 62 | 63 | ``` 64 | 65 | ### 字典想加 66 | ```python 67 | a={1:1} 68 | b={2:2} 69 | 70 | c= dict(a,**b) # 返回值为大字典 71 | a.update(b) # 返回值为None,a为大字典 72 | ``` 73 | 74 | ### 从大字典取出小字典 75 | ```python 76 | dic = {'a': 1, 'b': 2, 'c': 3} 77 | lis = ['a', 'b'] 78 | 79 | print dict(zip(lis, map(lambda k: dic.get(k), lis))) 80 | ``` 81 | 82 | ### 两个元组或列表转字典 83 | ```python 84 | k = ['a', 'b', 'c'] 85 | v = [1, 2, 3] 86 | 87 | print zip(k, v) 88 | print dict(zip(k, v)) 89 | 90 | ``` 91 | 92 | ### 对象列表构造大字典 93 | ```python 94 | class E: 95 | def __init__(self, k, v): 96 | self.k = k 97 | self.v = v 98 | 99 | e1 = E('a', 1) 100 | e2 = E('b', 2) 101 | e3 = E('c', 3) 102 | l = [e1, e2, e3] 103 | 104 | print reduce(lambda o1, o2: dict(o1, **o2), 105 | map(lambda e: {e.k: e.v}, l)) 106 | ``` 107 | -------------------------------------------------------------------------------- /file.md: -------------------------------------------------------------------------------- 1 | # 文件处理 2 | 3 | #读取文件 4 | 5 | ###read() 6 | 一次性读取,读取大文件时容易内存溢出 7 | ```python 8 | def read_file(): 9 | with open(path, "r") as f: 10 | print f.read() # 一次性读取,容易内存溢出 11 | 12 | ``` 13 | ###readlines() 14 | 一次性读到列表,读取大文件时容易内存溢出 15 | ```python 16 | def read_line_file(): 17 | with open(path, "r") as f: 18 | for line in f.readlines(): # 一次性读取,容易内存溢出 19 | print line 20 | ``` 21 | 22 | 23 | ###读取大文件 24 | 1.使用pythonic方式读取大文件(推荐方法): 25 | ```python 26 | def read_big_file(path): 27 | """ 28 | 使用pythonic方式读取大文件 29 | :param path: 30 | :return: 31 | """ 32 | with open(path) as f: # 文件对象f当作迭代对象, 系统将自动处理IO缓冲和内存管理 33 | for line in f: 34 | print line 35 | 36 | ``` 37 | 2.按指定大小读取大文件(建议是二进制文件,如果是字符串处理会,出现截取不准确): 38 | ```python 39 | 40 | def get_big_file(path, size): 41 | with open(path, "r") as f: 42 | while True: 43 | block = f.read(size) # 每次读取固定长度到内存缓冲区 44 | if block: 45 | yield block 46 | else: 47 | return # 如果读取到文件末尾,则退出 48 | ``` 49 | 50 | #写入文件 51 | 52 | ###文件复制 53 | 使用 pythonic 方式复制文件(推荐方法): 54 | ```python 55 | def copy_file(src, target): 56 | start = time.time() 57 | dest = open(target, 'wb+') 58 | with open(src) as f: # 文件对象f当作迭代对象, 系统将自动处理IO缓冲和内存管理 59 | for line in f: 60 | dest.write(line) 61 | dest.close() 62 | print '时长:', time.time() - start, '秒' 63 | ``` 64 | 按指定大小复制文件(以二进制方式打开源文件): 65 | ```python 66 | def copy_file_block(src, target, size): 67 | start = time.time() 68 | dest = open(target, 'wb+') 69 | with open(src, "r") as f: 70 | while True: 71 | block = f.read(size) # 每次读取固定长度到内存缓冲区 72 | if block: 73 | dest.write(block) 74 | else: 75 | break # 如果读取到文件末尾,则退出 76 | dest.close() 77 | print '时长:', time.time() - start, '秒' 78 | 79 | ``` 80 | 81 | #练习: 82 | 计算大文件中单词排序个数 83 | ```python 84 | 85 | import time 86 | 87 | start = time.time() 88 | dic = {} 89 | with open(path) as f: 90 | for line in f: 91 | lit = line.split(";") 92 | for i in lit: 93 | if dic.get(i): 94 | dic[i] = dic.get(i) + 1 95 | else: 96 | dic[i] = 1 97 | 98 | print '时长:', time.time() - start, '秒' 99 | sort = sorted(dic.items(), key=lambda item: item[1], reverse=True) 100 | for v in sort: 101 | print v 102 | ``` 103 | 104 | 105 | 106 | -------------------------------------------------------------------------------- /levenshtein.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-7-4 上午11:21 4 | # @Author : Tom.Lee 5 | # @Description : http://blog.csdn.net/gzlaiyonghao/article/details/1483728 6 | # @File : test.py 7 | # @Product : PyCharm 8 | 9 | import re 10 | 11 | """ 12 | Levenshtein Distance 编辑距离算法,计算字符串相似度 13 | 14 | 比如要计算cafe和coffee的编辑距离。cafe→caffe→coffe→coffee 为3 15 | 先创建一个6×8的表(cafe长度为4,coffee长度为6,各加2)*代表空白占位符 16 | 17 | * * c o f f e e 18 | 19 | * 0 1 2 3 4 5 6 20 | 21 | c 1 0 1 2 3 4 5 22 | 23 | a 2 1 1 2 3 4 5 24 | 25 | f 3 2 2 1 2 3 4 26 | 27 | e 4 3 3 2 2 2 3 28 | 29 | 30 | 从3,3格开始,开始计算。取以下三个值的最小值: 31 | 1.如果最上方的字符等于最左方的字符,则为左上方的数字。否则为左上方的数字+1。(对于3,3来说为0) 32 | 2.左方数字+1(对于3,3格来说为2) 33 | 3.上方数字+1(对于3,3格来说为2) 34 | """ 35 | 36 | a = 'cafee' 37 | b = 'cof1ee' 38 | 39 | 40 | def minimum(a, b): 41 | m, n = len(a), len(b) 42 | col_size, matrix = m + 1, [] 43 | for i in range((m + 1) * (n + 1)): 44 | matrix.append(0) 45 | for i in range(col_size): 46 | matrix[i] = i 47 | for i in range(n + 1): 48 | matrix[i * col_size] = i 49 | for i in range(n + 1)[1:n + 1]: 50 | for j in range(m + 1)[1:m + 1]: 51 | if a[j - 1] == b[i - 1]: 52 | cost = 0 53 | else: 54 | cost = 1 55 | min_value = matrix[(i - 1) * col_size + j] + 1 56 | if min_value > matrix[i * col_size + j - 1] + 1: 57 | min_value = matrix[i * col_size + j - 1] + 1 58 | if min_value > matrix[(i - 1) * col_size + j - 1] + cost: 59 | min_value = matrix[(i - 1) * col_size + j - 1] + cost 60 | matrix[i * col_size + j] = min_value 61 | return matrix[n * col_size + m] 62 | 63 | 64 | s1 = 'Invalid input for operation: Requested subnet with cidr: 172.16.17.0' \ 65 | '/24 for network: c6aa9c38-ccee-467f-a1e7-c718a33ecc06 overlaps with another subnet.' 66 | 67 | s2 = 'Invalid input for operation: Requested subnet with cidr: 192.168.11.0/24' \ 68 | ' for network: 028d91af-b461-4d9d-ab76-da4a8845d3cf overlaps with another subnet.' 69 | 70 | 71 | def pop_cidr_uuid(s): 72 | s = re.compile(r'(?> None 38 | print list # [1,2,3,'a'] 39 | 40 | ``` 41 | 42 | ### sorted()临时排序 43 | `sorted()`方法的排序规则与sort()方法相同 44 | ```python 45 | list=[1,3,2] 46 | 47 | print sorted(list) # [1,2,3] 48 | print list # [1,3,2] 49 | 50 | ``` 51 | 52 | ### reverse()列表反转 53 | 使用`reverse()`方法使列表元素顺序发生反转,注意该方法对列表的改变是永久的 54 | ```python 55 | list=[1,2,3] 56 | 57 | print list # [1,2,3] 58 | 59 | list.reverse() # None, 该方法没有返回值 60 | print list # [3,2,1] ,使用`reverse()`方法永久的改变了列表的元素位置 61 | 62 | 63 | ``` 64 | --- 65 | # 确定列表长度 66 | 使用`len()`函数获取列表长度,如:`list=[1,2,3] ; print len(list) >> 3` 67 | 68 | --- 69 | # 遍历 70 | * 带下标遍历: 71 | ```python 72 | x = [11, 12, 13, 14, 15] 73 | for index, value in enumerate(x): 74 | print index, ':', value 75 | ``` 76 | 77 | * 简单遍历: 78 | ```python 79 | for val in list: 80 | print val 81 | ``` 82 | 83 | * 精简操作,使用列表解析式: `[表达式 for v in list]` 返回一个新的列表 84 | ```python 85 | num_list = [1, 2, 3] # [1,2,3] 86 | 87 | # 简单的列表解析式 88 | num_list = [v+1 for v in num_list] # [2,3,4] 89 | 90 | # 嵌套的列表解析式 91 | ww, ll = ['1', '22', '333', '4444'], [] 92 | for w in ww: 93 | for l in w: 94 | ll.append(l) 95 | 96 | print ll 97 | print [l for w in ww for l in w] 98 | 99 | """结构变形 100 | print[l 101 | for w in ww 102 |     for l in w] 103 | 104 | """ 105 | ``` 106 | 107 | --- 108 | # 切片 109 | 注意参数start,end为列表下标: 110 | * `list[start:stop]` # [start,stop) 111 | * `list[:stop]` # [0,end) 112 | * `list[:-1]` #[0,len-1) 即返回列表的前len-1个元素 113 | * `list[start:]` # [start,len) 114 | * `list[-1:]` # [len-1,len) 115 | * `list[:]` # [0,len) ,copy 116 | * `list[::2]` # 先取2个元素组成元组,(0,1),(2,3),(4,5),然后取下标为1的值 ==>[0,2,4] 117 | * `list[::-1]` # 先取1个元素组成元组,(5,),(4,)(3,),(2,),(1,),(1,) >> [5,4,3,2,1,0] 列表反转的技巧 118 | 119 | ### 拓展切片 120 | **注:list[i:j:stride]:**表示拓展切片,i表示起始索引,j表示终止索引,stride表示步长,stride默认为1,只能为非0整数 121 | * stride正数:切片从左往右切,切出[i:j),然后按照stride的值,进行获取,表示相隔(stride-1)个元素取出一个元素,组成新列表 122 | * stride负数:切片从右往左切,因为从右往左切,所以此时,j为起始索引,i为终止索引,切出[j:i],所以"i的值要大于j", 123 | 注意此时起始下标默认为None,假如起始下标j=0,表示从第二个元素开始,包含终止索引然后按照stride的值,进行获取 124 | 125 | **举例**: 126 | 127 | 128 | 129 | # 列表的引用 130 | 如果你将列表list赋值给变量a,而将a赋值给b,则a,b两个变量指向的位置时一样的,所以你改变b时,a也会发生相同的变化 131 | ```python 132 | a = [1,2,3] 133 | b = a 134 | 135 | print a # [1,2,3] 136 | print b # [1,2,3] 137 | 138 | b.append(4) 139 | print a # [1,2,3,4] 140 | ``` 141 | 如何避免不更改原来的列表? 142 | * 列表复制:`new_list = old_list[:]`,但这种方法只适用于简单列表,也就是列表中的元素都是基本类型, 143 | 如果列表元素还存在列表的话,这种方法就不适用了。原因就是,象a[:]这种处理,只是将列表元素的值生成一个新的列表,如果列表元素也是一个列表,如:a=[1,[2]],那么这种复制对于元素[2]的处理只是复制[2]的引用,而并未生成 [2]的一个新的列表复制 144 | ``` 145 | >>> a=[1,[2]] 146 | >>> b=a[:] 147 | >>> b 148 | [1, [2]] 149 | >>> a[1].append(3) 150 | >>> a 151 | [1, [2, 3]] 152 | >>> b 153 | [1, [2, 3]] 154 | ``` 155 | * 使用copy模块中的deepcopy函数: 156 | ``` 157 | >>> import copy 158 | >>> a=[1,[2]] 159 | >>> b=copy.deepcopy(a) 160 | >>> b 161 | [1, [2]] 162 | >>> a[1].append(3) 163 | >>> a 164 | [1, [2, 3]] 165 | >>> b 166 | [1, [2]] 167 | ``` 168 | 169 | # 转换 170 | “字符串”原理上其实是一个个字符的列表,所以 `str='ab0'; list=list(str) >> ['a','b',0]`, 171 | 所以对字符串进行截取时,可以先转换为列表处理,然后利用`join()`函数再转换为字符串 172 | * str 转 list : `list(str)` 173 | * list 转 str : `''.join(list)` 174 | 175 | “字典”是成对存在的,所以要转换时,保证列表的对称 176 | * list 转 dict : 177 | ``` 178 | >>> list_k=['a','b','c','d'] 179 | >>> list_v=[1,2,3,4] 180 | >>> dict(zip(list_k,list_v)) 181 | {'a': 1, 'c': 3, 'b': 2, 'd': 4} 182 | >>> 183 | ``` 184 | 如果列表中元素为 元素个数为2的列表,可用直接使用dict(list)来转换,如`dict([['a',1],['b',2]])` 185 | 186 | * dict 转 list : 187 | ``` 188 | >>> keys=dict.keys() 189 | >>> keys 190 | ['a', 'c', 'b', 'd'] 191 | >>> values=dict.values() 192 | >>> values 193 | [1, 3, 2, 4] 194 | >>> 195 | ``` 196 | 元组()与set()可用直接使用同名函数,互相转换,如:`list((1,2)), tuple([1,2,3])` 197 | 198 | 199 | # 常用函数 200 | ### range()函数 201 | `range()`函数可以返回一个数值列表:`range(5)`返回`[0, 1, 2, 3, 4]`注意第一个元素是从0开始, 202 | 在python 终端,你可以使用`help(range)`查看`range()`函数的使用方法: 203 | ``` 204 | range(...) 205 | 返回包含整数的列表。 206 | 207 | range(stop) -> 返回一个数值列表,stop指定终止值,但不包含 208 | range(4) >> [0, 1, 2, 3]. 209 | 210 | range(start, stop[, step]) -> 返回一个数值列表,可用start指定起始值,setp指定步长,当给定步长时,它指定增量(或递减) 211 | range(i, j) >> [i, i+1, i+2, ..., j-1]; start (!) 默认从0开始. 212 | range(1,10,2) >> [1, 3, 5, 7, 9] 213 | 214 | ``` 215 | ### min()函数 216 | `min()`返回一个**可迭代**对象中最小的值或**传入多个参数**的最小参数,`help(min)`查看帮助文档 217 | ``` 218 | min(...) 219 | min(iterable[, key=func]) -> value 220 | min(a, b, c, ...[, key=func]) -> value 221 | 222 | With a single iterable argument, return its smallest item. 223 | With two or more arguments, return the smallest argument. 224 | (END) 225 | 226 | ``` 227 | 举例: 228 | ``` 229 | >>> min([1,2,34]) 230 | 1 231 | >>> min([1,2,34],0) 232 | 0 233 | >>> 234 | >>> min(['a',2,34]) 235 | 2 236 | 237 | ``` 238 | 239 | # 技巧 240 | ### list列表想加 241 | ``` 242 | list = list1+list2 243 | ``` 244 | 245 | 246 | 247 | 248 | -------------------------------------------------------------------------------- /loop.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomoncle/Python-notes/ce675486290c3d1c7c2e4890b57e3d0c8a1228cc/loop.md -------------------------------------------------------------------------------- /page_parser/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-8 下午2:19 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : __init__.py.py 7 | # @Product : PyCharm 8 | -------------------------------------------------------------------------------- /page_parser/beautifulsoup/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-4-20 下午1:53 4 | # @Author : Tom.Lee 5 | # @Description : 6 | # @File : __init__.py.py 7 | # @Product : PyCharm 8 | -------------------------------------------------------------------------------- /page_parser/beautifulsoup/parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-4-19 下午10:10 4 | # @Author : tom.lee 5 | # @Site : 解析器 6 | # @File : parser.py 7 | # @Software: PyCharm 8 | 9 | """ 10 | 支持多级条件查询: 11 | """ 12 | 13 | import urlparse 14 | 15 | import bs4 16 | 17 | 18 | class HtmlParser(object): 19 | """ 20 | 网页解析器,可以继承此类,实现更复杂功能 21 | """ 22 | url_filed = 'href' 23 | parser = 'html.parser' 24 | encoding = 'utf-8' 25 | 26 | def __init__(self, base_url=None): 27 | self.__base_url = base_url 28 | 29 | def simple_tags(self, data, tag=None, patterns=None, attributes=None): 30 | """ 31 | 单个标签解析 32 | """ 33 | tags = self.__parser_tags(data, tag, patterns) 34 | return self.__tags(tags, attributes) 35 | 36 | def multilevel_tags(self, data, multilevel_patterns=None, attributes=None): 37 | """ 38 | 多标签解析 39 | examples : div .a 40 | """ 41 | if not multilevel_patterns: 42 | return data 43 | 44 | for tag_patterns in multilevel_patterns: 45 | tag, patterns = tag_patterns.items()[0] 46 | data = self.__parser_tags(data, tag, patterns) 47 | multilevel_patterns.remove(tag_patterns) 48 | 49 | if not multilevel_patterns: 50 | return self.__tags(data, attributes) 51 | 52 | return self.multilevel_tags(data, multilevel_patterns, attributes) 53 | 54 | def element(self, data, tag=None, patterns=None): 55 | """ 56 | 查询符合条件的第一个标签元素 57 | """ 58 | elements = self.elements(data, tag, patterns) 59 | return elements[0] if elements else None 60 | 61 | def elements(self, data, tag=None, patterns=None): 62 | """ 63 | 元素集合 64 | """ 65 | return self.__parser_tags(data, tag, patterns) 66 | 67 | def __tags(self, data, attributes=None): 68 | """ 69 | 标签列表 70 | """ 71 | tags = [dict(tag_.attrs, text=tag_.getText()) for tag_ in data] 72 | 73 | if not attributes: 74 | return tags 75 | 76 | for tag_attr in tags: 77 | for k, v in tag_attr.items(): 78 | if k in attributes: 79 | continue 80 | tag_attr.pop(k) 81 | 82 | if self.__base_url: 83 | return self.__format_url(tags) 84 | 85 | return tags 86 | 87 | def __parser_tags(self, data, tag=None, patterns=None): 88 | """ 89 | 返回查询对象列表 90 | """ 91 | return self.__data_parser(data).find_all(tag, patterns) 92 | 93 | def __data_parser(self, data): 94 | if isinstance(data,(str,unicode)): 95 | bs4_str = data 96 | elif isinstance(data, bs4.element.ResultSet): 97 | bs4_str = ' '.join([str(_tag) for _tag in data]) 98 | else: 99 | bs4_str = str(data) 100 | return bs4.BeautifulSoup(bs4_str, self.parser, from_encoding=self.encoding) 101 | 102 | def __format_url(self, maps): 103 | for m in maps: 104 | if not m.get(self.url_filed): 105 | continue 106 | m[self.url_filed] = urlparse.urljoin( 107 | self.__base_url, m.get(self.url_filed) 108 | ) 109 | return maps 110 | 111 | 112 | if __name__=='__main__': 113 | doc = """ 114 | The Dormouse's story 115 | 116 |

The Dormouse's story

117 | 118 |

Once upon a time there were three little sisters; and their names were 119 | Elsie, 120 | Lacie and 121 | 标题; 122 | and they lived at the bottom of a well.

123 | 124 |

...

125 | """ 126 | for s in HtmlParser().multilevel_tags(doc, [{'a': {'class': 'sister'}}]): 127 | print s['text'] 128 | -------------------------------------------------------------------------------- /page_parser/beautifulsoup/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-4-20 下午1:53 4 | # @Author : Tom.Lee 5 | # @Description : 6 | # @File : test.py 7 | # @Product : PyCharm 8 | 9 | import re 10 | import urlparse 11 | 12 | import bs4 13 | 14 | html_doc = """ 15 | The Dormouse's story 16 | 17 |

The Dormouse's story

18 | 19 |

Once upon a time there were three little sisters; and their names were 20 | Elsie, 21 | Lacie and 22 | 标题; 23 | and they lived at the bottom of a well.

24 | 25 |

...

26 | """ 27 | # print html_doc 28 | 29 | 30 | soup = bs4.BeautifulSoup(html_doc, "html.parser", from_encoding='utf-8') 31 | pattern = {'class': 'sister', 'id': re.compile(r'^link\d+$')} 32 | data = soup.find_all('a', pattern) 33 | print data 34 | 35 | # 对象属性 36 | print data[0] 37 | print data[2].getText() 38 | print data[0].contents 39 | print data[0].attrs 40 | 41 | 42 | # 43 | # tags_a = soup.find_all(name='a', attrs={'class': 'sister'}) 44 | # for a in tags_a: 45 | # print type(a), a.get('id'), a.get('href'), a.get('no_found') 46 | # print dict(a.attrs, tag_name=a.getText()) 47 | print isinstance(data,bs4.element.ResultSet) 48 | bs4_str = ' '.join([str(_tag) for _tag in data]) 49 | sp = bs4.BeautifulSoup(bs4_str, "html.parser", from_encoding='utf-8') 50 | data = sp.find_all('a', pattern) 51 | print data 52 | print data[2].getText() 53 | 54 | 55 | # 字符串拼接 56 | # print urlparse.urljoin('https://www.baidu.com', '//www.ji.com') -------------------------------------------------------------------------------- /page_parser/beautifulsoup/test_403.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-7-25 下午3:17 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : test_403.py 7 | # @Product : PyCharm 8 | 9 | import bs4 10 | 11 | t403 = """ 12 | 13 | 14 | 403 Forbidden 15 | 16 | 17 |

403 Forbidden

18 | 资源 bc6d81de-97af-4ebd-b01a-b23a6567bea2 is protected and cannot be deleted.

19 | 20 | 21 | 22 | 23 | 24 | """ 25 | soup = bs4.BeautifulSoup(t403, "html.parser", from_encoding='utf-8') 26 | 27 | title = soup.find('title') 28 | body = soup.find('body') 29 | title_text = title.getText() 30 | body_text = body.getText().replace(title_text, '').replace('\n', '') 31 | 32 | print {title_text.split(' ')[-1]: {'message': body_text, 'code': 1}} 33 | -------------------------------------------------------------------------------- /page_parser/xpath/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-5-6 下午1:10 4 | # @Author : tom.lee 5 | # @Site : 6 | # @File : __init__.py.py 7 | # @Software: PyCharm 8 | -------------------------------------------------------------------------------- /page_parser/xpath/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-5-6 下午1:10 4 | # @Author : tom.lee 5 | # @Site : 6 | # @File : test.py 7 | # @Software: PyCharm 8 | 9 | 10 | from lxml import etree 11 | 12 | f = open('file.txt') 13 | content = f.read() 14 | selector = etree.HTML(content) 15 | 16 | divs = selector.xpath('//div[@class="site-item "]/div[@class="title-and-desc"]') 17 | for r in divs: 18 | item_ = None or {} 19 | item_['title'] = r.xpath('a/div/text()')[0] 20 | item_['link'] = r.xpath('a/@href')[0] 21 | item_['desc'] = r.xpath('div/text()')[0].replace('\n', '').strip() 22 | print item_ 23 | -------------------------------------------------------------------------------- /rpc/RPyC/demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-13 下午1:31 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 6 | # @File : demo.py 7 | # @Product : PyCharm 8 | # @Docs : 9 | # @Source : 10 | 11 | import rpyc 12 | from rpyc.utils.server import ThreadedServer 13 | 14 | 15 | class MyService(rpyc.Service): 16 | data = [] 17 | 18 | def exposed_save_data(self, d): 19 | self.data.append(d) 20 | 21 | def exposed_get_data(self): 22 | return self.data 23 | 24 | 25 | class MyClient(object): 26 | @classmethod 27 | def conn(cls): 28 | connections = rpyc.connect('localhost', 15111) 29 | connections.root.save_data(123) 30 | print connections.root.get_data() 31 | 32 | 33 | if __name__ == '__main__': 34 | import threading 35 | import time 36 | 37 | server = ThreadedServer(MyService, port=15111) 38 | client = MyClient() 39 | 40 | 41 | def start(): 42 | print '*************************************' 43 | print '*************************************' 44 | print '*****************RpyC****************' 45 | print '************ **************' 46 | print '*************************************' 47 | print '***************start server**********' 48 | print '*************************************' 49 | print '*************************************\n\n' 50 | server.start() 51 | 52 | threading.Thread(target=start).start() 53 | 54 | client.conn() 55 | time.sleep(5) 56 | 57 | server.close() 58 | print 'service stop.' 59 | -------------------------------------------------------------------------------- /rpc/RPyC/tutorials/part01.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-13 下午3:37 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : part01.py 7 | # @Product : PyCharm 8 | # @Source : 9 | 10 | """ 11 | rpyc 客户端查询服务端信息 12 | 13 | """ 14 | 15 | import os 16 | 17 | import rpyc 18 | 19 | # 打印当前路径 20 | print os.getcwd() 21 | 22 | # 启动内置服务 23 | # os.system('python /usr/local/bin/rpyc_classic.py') 24 | """ 25 | CMD: 26 | tom@aric-ThinkPad-E450:~$ python /usr/local/bin/rpyc_classic.py 27 | INFO:SLAVE/18812:server started on [0.0.0.0]:18812 28 | """ 29 | 30 | # 连接服务器 31 | conn = rpyc.classic.connect('localhost') 32 | 33 | mod1 = conn.modules.sys # 访问服务器上的sys模块 34 | mod2 = conn.modules["xml.dom.minidom"] # 访问服务器上的xml.dom.minidom模块 35 | print mod1, mod2 36 | 37 | # 打印服务器启动路径 38 | print conn.modules.os.getcwd() 39 | -------------------------------------------------------------------------------- /rpc/RPyC/tutorials/services/registry_discovery/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-15 下午1:27 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : __init__.py.py 7 | # @Product : PyCharm 8 | # @Docs : 9 | # @Source : 10 | 11 | 12 | -------------------------------------------------------------------------------- /rpc/RPyC/tutorials/services/registry_discovery/client_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-15 下午2:00 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : client_test.py 7 | # @Product : PyCharm 8 | # @Docs : 9 | # @Source : 10 | 11 | 12 | import rpyc 13 | from rpyc.utils.registry import UDPRegistryClient 14 | 15 | 16 | def service01(): 17 | conn = rpyc.connect(host='localhost', port=18861) 18 | root = conn.root # MyService object 19 | # object 20 | print root 21 | 22 | print root.get_service_name() 23 | print root.get_service_aliases() 24 | 25 | # custom method 26 | print root.get_answer() # 66 27 | print root.exposed_get_answer() # 66 28 | # print root.get_question() # AttributeError: cannot access 'get_question' 29 | 30 | registrar = UDPRegistryClient() 31 | list_of_servers = registrar.discover("foo") 32 | print rpyc.discover(service_name='MY', host='localhost') 33 | 34 | 35 | if __name__ == '__main__': 36 | service01() 37 | -------------------------------------------------------------------------------- /rpc/RPyC/tutorials/services/registry_discovery/service01.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-15 下午1:35 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : service.py 7 | # @Product : PyCharm 8 | # @Docs : 9 | # @Source : 10 | 11 | import rpyc 12 | from rpyc.utils.server import ThreadedServer 13 | 14 | 15 | 16 | class MyService(rpyc.Service): 17 | 18 | def on_connect(self): 19 | pass 20 | 21 | def on_disconnect(self): 22 | pass 23 | 24 | @classmethod 25 | def exposed_get_answer(cls): 26 | return 66 27 | 28 | @classmethod 29 | def get_question(cls): 30 | return "what is the airspeed velocity of an unladen swallow?" 31 | 32 | 33 | if __name__ == "__main__": 34 | t = ThreadedServer(MyService, port=18861) 35 | print """ 36 | service start ok! port {port} 37 | """.format(port=18861) 38 | t.start() 39 | -------------------------------------------------------------------------------- /scheduler_task/study_apscheduler/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-8 下午2:28 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : __init__.py.py 7 | # @Product : PyCharm 8 | -------------------------------------------------------------------------------- /scheduler_task/study_apscheduler/examples/demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-13 上午11:33 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : demo.py 7 | # @Product : PyCharm 8 | # @Docs : 9 | # @Source : 10 | 11 | 12 | import os 13 | 14 | from apscheduler.schedulers.blocking import BlockingScheduler 15 | 16 | if __name__ == '__main__': 17 | scheduler = BlockingScheduler() 18 | scheduler.add_job('sys:stdout.write', 'interval', seconds=3, args=['tick ...\n']) 19 | print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C')) 20 | 21 | try: 22 | scheduler.start() 23 | except (KeyboardInterrupt, SystemExit): 24 | pass 25 | -------------------------------------------------------------------------------- /scheduler_task/study_apscheduler/examples/executors/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-13 上午11:07 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : __init__.py.py 7 | # @Product : PyCharm 8 | -------------------------------------------------------------------------------- /scheduler_task/study_apscheduler/examples/executors/configure.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-9 上午9:56 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 6 | # @File : job_configure.py 7 | # @Product : PyCharm 8 | 9 | 10 | from apscheduler.executors.pool import ThreadPoolExecutor, ProcessPoolExecutor 11 | from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore 12 | from apscheduler.schedulers.background import BackgroundScheduler 13 | from pytz import utc 14 | 15 | job_stores = { 16 | 'default': SQLAlchemyJobStore( 17 | url='mysql+mysqldb://root:root@localhost:3306/djangoapp?charset=utf8') 18 | } 19 | 20 | 21 | executors = { 22 | 'default': ThreadPoolExecutor(20), 23 | 'processpool': ProcessPoolExecutor(5) 24 | } 25 | 26 | 27 | job_defaults = { 28 | 'coalesce': False, 29 | 'max_instances': 3 30 | } 31 | 32 | # UTC as the scheduler’s timezone 33 | scheduler = BackgroundScheduler( 34 | jobstores=job_stores, 35 | executors=executors, 36 | job_defaults=job_defaults, 37 | timezone=utc, 38 | daemon=False 39 | ) 40 | 41 | 42 | def current_time(): 43 | import datetime 44 | return datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") 45 | 46 | 47 | def job1(): 48 | print 'job1 is running, Now is %s' % current_time() 49 | 50 | 51 | def job2(): 52 | print 'job2 is running, Now is %s' % current_time() 53 | 54 | 55 | # 每隔5秒运行一次job1,replace_existing=True防止添加重复,启动失败 56 | scheduler.add_job(job1, 'interval', seconds=5, id='job1', replace_existing=True) 57 | # 每隔5秒运行一次job2 58 | scheduler.add_job(job2, 'cron', second='*/5', id='job2', replace_existing=True) 59 | scheduler.start() 60 | print scheduler.get_jobs() 61 | -------------------------------------------------------------------------------- /scheduler_task/study_apscheduler/examples/executors/process_pool.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-13 上午10:56 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 6 | # @File : process_pool.py 7 | # @Product : PyCharm 8 | 9 | from datetime import datetime 10 | import os 11 | 12 | from apscheduler.schedulers.blocking import BlockingScheduler 13 | 14 | 15 | def tick(): 16 | print('Tick! The time is: %s' % datetime.now()) 17 | 18 | 19 | if __name__ == '__main__': 20 | scheduler = BlockingScheduler() 21 | scheduler.add_executor('processpool') 22 | scheduler.add_job(tick, 'interval', seconds=3) 23 | print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C')) 24 | 25 | try: 26 | scheduler.start() 27 | except (KeyboardInterrupt, SystemExit): 28 | pass -------------------------------------------------------------------------------- /scheduler_task/study_apscheduler/examples/executors/simple.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-7-22 上午10:29 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 6 | # @File : t.py 7 | # @Product : PyCharm 8 | 9 | 10 | import datetime 11 | 12 | from apscheduler.executors.pool import ThreadPoolExecutor, ProcessPoolExecutor 13 | from apscheduler.schedulers.blocking import BlockingScheduler 14 | 15 | scheduler = BlockingScheduler() 16 | 17 | def current_time(): 18 | return datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") 19 | 20 | 21 | # ###################使用add_job运行任务################# 22 | 23 | def job1(): 24 | print 'job1 is running, Now is %s' % current_time() 25 | 26 | 27 | def job2(): 28 | print 'job2 is running, Now is %s' % current_time() 29 | 30 | 31 | # 每隔5秒运行一次job1 32 | scheduler.add_job(job1, 'interval', seconds=5, id='job1') 33 | # 每隔5秒运行一次job2 34 | scheduler.add_job(job2, 'cron', second='*/5', id='job2') 35 | 36 | 37 | # ###################使用装饰器添加任务################# 38 | 39 | # 每隔5秒运行一次job3 40 | @scheduler.scheduled_job('interval', seconds=5, id='job3') 41 | def job3(): 42 | print 'job3 is running, Now is %s' % current_time() 43 | 44 | 45 | # 每隔5秒运行一次job4 46 | @scheduler.scheduled_job('cron', second='*/5', id='job4') 47 | def job4(): 48 | print 'job4 is running, Now is %s' % current_time() 49 | 50 | 51 | executors = { 52 | 'processpool': ProcessPoolExecutor(5), 53 | 'default': ThreadPoolExecutor(20) 54 | 55 | } 56 | job_defaults = { 57 | 'coalesce': False, 58 | 'max_instances': 5 59 | } 60 | scheduler.configure(executors=executors, job_defaults=job_defaults) 61 | scheduler.start() 62 | -------------------------------------------------------------------------------- /scheduler_task/study_apscheduler/examples/jobstores/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-13 上午11:09 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : __init__.py.py 7 | # @Product : PyCharm 8 | -------------------------------------------------------------------------------- /scheduler_task/study_apscheduler/examples/jobstores/job_store.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-13 上午11:14 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 6 | # @File : mongodb.py 7 | # @Product : PyCharm 8 | # @Docs : 9 | 10 | 11 | from datetime import datetime, timedelta 12 | import sys 13 | import os 14 | 15 | from apscheduler.schedulers.blocking import BlockingScheduler 16 | 17 | 18 | def alarm(time): 19 | print('Alarm! This alarm was scheduled at %s.' % time) 20 | 21 | 22 | if __name__ == '__main__': 23 | scheduler = BlockingScheduler() 24 | # ################ mongodb 25 | scheduler.add_jobstore('mongodb', collection='example_jobs') 26 | 27 | if len(sys.argv) > 1 and sys.argv[1] == '--clear': 28 | scheduler.remove_all_jobs() 29 | 30 | alarm_time = datetime.now() + timedelta(seconds=10) 31 | scheduler.add_job(alarm, 'date', run_date=alarm_time, args=[datetime.now()]) 32 | print('To clear the alarms, run this example with the --clear argument.') 33 | print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C')) 34 | 35 | try: 36 | scheduler.start() 37 | except (KeyboardInterrupt, SystemExit): 38 | pass 39 | -------------------------------------------------------------------------------- /scheduler_task/study_apscheduler/examples/jobstores/log.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-13 上午11:29 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : log.py 7 | # @Product : PyCharm 8 | # @Docs : 9 | # @Source : 10 | import logging 11 | 12 | log = logging.getLogger('apscheduler.executors.default') 13 | log.setLevel(logging.INFO) # DEBUG 14 | # 设定日志格式 15 | fmt = logging.Formatter('%(levelname)s:%(name)s:%(message)s') 16 | handler = logging.StreamHandler() 17 | handler.setFormatter(fmt) 18 | log.addHandler(handler) 19 | -------------------------------------------------------------------------------- /scheduler_task/study_apscheduler/examples/schedules/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-13 上午11:39 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : __init__.py.py 7 | # @Product : PyCharm 8 | # @Docs : 9 | # @Source : 10 | 11 | 12 | -------------------------------------------------------------------------------- /scheduler_task/study_apscheduler/examples/schedules/schdule.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-13 上午11:40 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : asyncio.py 7 | # @Product : PyCharm 8 | # @Docs : 9 | 10 | import os 11 | import time 12 | from datetime import datetime 13 | 14 | 15 | def asyncio_schedule(): 16 | """ 17 | python version >= 3.4.0 18 | :return: 19 | """ 20 | from apscheduler.schedulers.asyncio import AsyncIOScheduler 21 | try: 22 | import asyncio 23 | except ImportError: 24 | import trollius as asyncio 25 | 26 | def tick(): 27 | print('Tick! The time is: %s' % datetime.now()) 28 | 29 | scheduler = AsyncIOScheduler() 30 | scheduler.add_job(tick, 'interval', seconds=3) 31 | scheduler.start() 32 | print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C')) 33 | 34 | # Execution will block here until Ctrl+C (Ctrl+Break on Windows) is pressed. 35 | try: 36 | asyncio.get_event_loop().run_forever() 37 | except (KeyboardInterrupt, SystemExit): 38 | pass 39 | 40 | 41 | def background_schedule(): 42 | from apscheduler.schedulers.background import BackgroundScheduler 43 | 44 | def tick(): 45 | print('Tick! The time is: %s' % datetime.now()) 46 | 47 | scheduler = BackgroundScheduler() 48 | scheduler.add_job(tick, 'interval', seconds=3) 49 | scheduler.start() 50 | print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C')) 51 | 52 | try: 53 | # This is here to simulate application activity (which keeps the main thread alive). 54 | while True: 55 | time.sleep(2) 56 | except (KeyboardInterrupt, SystemExit): 57 | # Not strictly necessary if daemonic mode is enabled but should be done if possible 58 | scheduler.shutdown() 59 | 60 | 61 | def blocking_schedule(): 62 | from apscheduler.schedulers.blocking import BlockingScheduler 63 | 64 | def tick(): 65 | print('Tick! The time is: %s' % datetime.now()) 66 | 67 | scheduler = BlockingScheduler() 68 | scheduler.add_job(tick, 'interval', seconds=3) 69 | print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C')) 70 | 71 | try: 72 | scheduler.start() 73 | except (KeyboardInterrupt, SystemExit): 74 | pass 75 | 76 | 77 | def gevent_schedule(): 78 | from apscheduler.schedulers.gevent import GeventScheduler 79 | 80 | def tick(): 81 | print('Tick! The time is: %s' % datetime.now()) 82 | 83 | scheduler = GeventScheduler() 84 | scheduler.add_job(tick, 'interval', seconds=3) 85 | g = scheduler.start() # g is the greenlet that runs the scheduler loop 86 | print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C')) 87 | 88 | # Execution will block here until Ctrl+C (Ctrl+Break on Windows) is pressed. 89 | try: 90 | g.join() 91 | except (KeyboardInterrupt, SystemExit): 92 | pass 93 | 94 | 95 | def qt_schedule(): 96 | import signal 97 | import sys 98 | from apscheduler.schedulers.qt import QtScheduler 99 | 100 | try: 101 | from PyQt5.QtWidgets import QApplication, QLabel 102 | except ImportError: 103 | try: 104 | from PyQt4.QtGui import QApplication, QLabel 105 | except ImportError: 106 | from PySide.QtGui import QApplication, QLabel 107 | 108 | def tick(): 109 | label.setText('Tick! The time is: %s' % datetime.now()) 110 | 111 | app = QApplication(sys.argv) 112 | 113 | # This enables processing of Ctrl+C keypresses 114 | signal.signal(signal.SIGINT, lambda *args: QApplication.quit()) 115 | 116 | label = QLabel('The timer text will appear here in a moment!') 117 | label.setWindowTitle('QtScheduler example') 118 | label.setFixedSize(280, 50) 119 | label.show() 120 | 121 | scheduler = QtScheduler() 122 | scheduler.add_job(tick, 'interval', seconds=3) 123 | scheduler.start() 124 | 125 | # Execution will block here until the user closes the windows or Ctrl+C is pressed. 126 | app.exec_() 127 | 128 | 129 | def tornado_schedule(): 130 | from tornado.ioloop import IOLoop 131 | from apscheduler.schedulers.tornado import TornadoScheduler 132 | 133 | def tick(): 134 | print('Tick! The time is: %s' % datetime.now()) 135 | 136 | scheduler = TornadoScheduler() 137 | scheduler.add_job(tick, 'interval', seconds=3) 138 | scheduler.start() 139 | print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C')) 140 | 141 | # Execution will block here until Ctrl+C (Ctrl+Break on Windows) is pressed. 142 | try: 143 | IOLoop.instance().start() 144 | except (KeyboardInterrupt, SystemExit): 145 | pass 146 | 147 | 148 | def twisted_schedule(): 149 | from twisted.internet import reactor 150 | from apscheduler.schedulers.twisted import TwistedScheduler 151 | 152 | def tick(): 153 | print('Tick! The time is: %s' % datetime.now()) 154 | 155 | scheduler = TwistedScheduler() 156 | scheduler.add_job(tick, 'interval', seconds=3) 157 | scheduler.start() 158 | print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C')) 159 | 160 | # Execution will block here until Ctrl+C (Ctrl+Break on Windows) is pressed. 161 | try: 162 | reactor.run() 163 | except (KeyboardInterrupt, SystemExit): 164 | pass 165 | 166 | 167 | if __name__ == '__main__': 168 | # gevent_schedule() 169 | # twisted_schedule() 170 | tornado_schedule() 171 | print 123 172 | pass 173 | -------------------------------------------------------------------------------- /scheduler_task/study_apscheduler/tutorials/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-13 上午10:56 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : __init__.py.py 7 | # @Product : PyCharm 8 | -------------------------------------------------------------------------------- /set.md: -------------------------------------------------------------------------------- 1 | # Python set集合 2 | python的set集合是是一组无序排列,可哈希的值,和字典的key是一样的,它初始化时会声明一个空列表,然后将元素加入, 3 | 所以使用终端打印时,set返回结果是`set([])` 4 | * 支持:集合关系测试,成员关系测试,可迭代 5 | * 不支持:索引,元素获取,切片 6 | * 类型:可变 `set()` , 不可变 `frozenset()` 7 | 8 | # 语法 9 | ### 声明 10 | 因为set()集合返回的是以列表的形式返回,所以初始化时,如果直接赋值字符串,它会把字符串转换为列表,然后加入 11 | * `s=set()` :声明一个空set集合 >> set([]) 12 | * `s=set('abc')` :声明并赋值set集合 >> set(['a','c','b']) 13 | * `s=set(['abc'])`:声明并赋值set集合 >> set(['abc']) 14 | 15 | ### 添加 16 | 因为set是无序唯一集合,所以添加重复元素是不能再次加入的 17 | * `set.append(val)` :添加元素到set集合 18 | * `set.update(val)` :添加的对象必须是可以迭代的对象,str,[],{}等,不能是int类型,它会首先把对象进行迭代然后加入到set集合中, 19 | 注意的是,对于{}字典类型,它只会操作keys. 20 | ```python 21 | >>> s=set() 22 | >>> s.add(1) 23 | >>> s 24 | set([1]) 25 | >>> s.update('abc') 26 | >>> s 27 | set(['a', 1, 'c', 'b']) 28 | >>> s.update([90,100]) 29 | >>> s 30 | set(['a', 1, 'c', 'b', 100, 90]) 31 | >>> s.update({'key1':1,'key2':2}) 32 | >>> s 33 | set(['a', 1, 'c', 'b', 100, 'key2', 'key1', 90]) 34 | >>> s.add(1) 35 | >>> s 36 | set(['a', 1, 'c', 'b', 100, 'key2', 'key1', 90]) 37 | ``` 38 | ### 删除 39 | `s.remove(val)`使用remove()方法进行元素的删除 40 | 41 | ### 集合操作 42 | 集合的交集、合集(并集)、差集 43 | ``` 44 | 举例:a=set([1,2,3]) ,b=set([3,4,5]) 45 | python符号 含义 举例 46 | - 差集,相对补集 a-b >>set([1, 2]) 47 | & 交集 a&b >>set([3]) 48 | | 合集,并集 a|b >>set([1, 2, 3, 4, 5]) 49 | != 不等于 a!=b >>True 50 | == 等于 a==b >>False 51 | in 是成员 1 in a >> True 52 | not in 不是成员 1 not in a >> False 53 | 54 | ``` 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /skills/README.md: -------------------------------------------------------------------------------- 1 | # python 中一些常用的技巧 2 | 3 | ##### 转义 4 | * Python中`"%"`的转义是`"%%"` 5 | 6 | ##### float类型保留小数: 7 | ```python 8 | b = float('%0.6f'%0.12345678) 9 | print b #0.123457 10 | ``` 11 | 12 | ##### url检测 13 | ```python 14 | url = 'https://www.baidu.com/' 15 | 16 | print url.rstrip('/') 17 | print url.rstrip('/')+'/home' 18 | ``` 19 | 20 | ##### url 转换函数 21 | ```python 22 | import urlparse 23 | urlparse.urljoin('http://www.aa.com:90/aa.html', '/abc.html') 24 | # http://www.aa.com:90/abc.html 25 | 26 | 27 | from __future__ import print_function 28 | 29 | import urlparse 30 | 31 | u = urlparse.urlparse('029_t002661t9gt.321002.2.ts?index=29&start=310000&end=320400') 32 | query_params = dict([s.split('=') for s in u.query.split('&')]) 33 | print('query_params : {}'.format(query_params)) 34 | # query_params : {'index': '29', 'end': '320400', 'start': '310000'} 35 | ``` 36 | 37 | ##### 去空格 38 | ```python 39 | s = ' 1 2 3 4 5 6 ' 40 | 41 | print '|%s|' % s.lstrip(' ') # 去除左边空格 |1 2 3 4 5 6 | 42 | print '|%s|' % s.rstrip(' ') # 去除右边空格 | 1 2 3 4 5 6| 43 | print '|%s|' % s.strip(' ') # 去除两边空格 |1 2 3 4 5 6| 44 | print '|%s|' % s.replace(' ', '') # 去除所有空格 |123456| 45 | ``` 46 | 47 | ##### 显示有限的接口到外部 48 | ``` 49 | 当发布python第三方package时, 并不希望代码中所有的函数或者class可以被外部import, 50 | 在__init__.py中添加__all__属性, 51 | 该list中填写可以import的类或者函数名, 可以起到限制的import的作用, 防止外部import其他函数或者类 52 | ``` 53 | 54 | ```python 55 | #!/usr/bin/env python 56 | # -*- coding: utf-8 -*- 57 | from base import utils 58 | 59 | __all__ = ['utils'] 60 | ``` 61 | 62 | --- 63 | ## Python博客 64 | * 关于`raw_input()` 和 `input()` :http://www.cnblogs.com/way_testlife/archive/2011/03/29/1999283.html 65 | 66 | 67 | -------------------------------------------------------------------------------- /skills/async_call.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: UTF-8 -*- 3 | 4 | import logging 5 | import Queue 6 | import threading 7 | 8 | 9 | def func_a(a, b): 10 | return a + b 11 | 12 | 13 | def func_b(): 14 | pass 15 | 16 | 17 | def func_c(a, b, c): 18 | return a, b, c 19 | 20 | 21 | # 异步任务队列 22 | _task_queue = Queue.Queue() 23 | 24 | 25 | def async_call(function, callback, *args, **kwargs): 26 | _task_queue.put({ 27 | 'function': function, 28 | 'callback': callback, 29 | 'args': args, 30 | 'kwargs': kwargs 31 | }) 32 | 33 | 34 | def _task_queue_consumer(): 35 | """ 36 | 异步任务队列消费者 37 | """ 38 | print '_task_queue_consumer' 39 | while True: 40 | try: 41 | task = _task_queue.get() 42 | function = task.get('function') 43 | callback = task.get('callback') 44 | args = task.get('args') 45 | kwargs = task.get('kwargs') 46 | try: 47 | if callback: 48 | callback(function(*args, **kwargs)) 49 | except Exception as ex: 50 | logging.error(ex) 51 | if callback: 52 | callback(ex) 53 | finally: 54 | _task_queue.task_done() 55 | except Exception as ex: 56 | # logging.warning(ex) 57 | logging.error(ex),"""***************""" 58 | 59 | 60 | def handle_result(result): 61 | print result 62 | print(type(result), result) 63 | 64 | 65 | if __name__ == '__main__': 66 | t = threading.Thread(target=_task_queue_consumer) 67 | t.daemon = True 68 | t.start() 69 | 70 | async_call(func_a, handle_result, 1, 2) 71 | async_call(func_b, handle_result) 72 | async_call(func_c, handle_result, 1, 2, 3) 73 | async_call(func_c, handle_result, 1, 2, 3, 4) 74 | 75 | _task_queue.join() 76 | -------------------------------------------------------------------------------- /skills/download_music.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | 3 | import requests 4 | import json 5 | import urllib 6 | import urllib2 7 | 8 | # 查看外链生成器的 url 方法 9 | data = requests.post( 10 | url='http://music.163.com/weapi/song/enhance/player/url', 11 | data={ 12 | 'params': 'iPslhRDpIz9eXPL6tuauyLF7NSU7yIetfu0vlx7lzfhlZGm21DICXpBCTmAqdiznd6LMnR6bGegIXGWyjNcHaOpjUC4E9ZLNt42hmQnA6QnYwitYsvs6CrKuXFp8pCJb', 13 | 'encSecKey': '47911c978b596e8c832e76ae96c0660ef6380d7f9e71c56e3ce7d90cf6978b385a6c5cba169cdf74d39cecae564cdaedfbc28e65cef01fbaeb3e0d27c228d6b0a63151ecb2d16a920eb37608d173c5824aa689dbfdb4fce2877df3702eb70cff009a20b84f94ca581b09f0c4840d51881af7702cf07a26e8a16f647739006ff0' 14 | }, 15 | ) 16 | 17 | # print json.loads(data.content) 18 | if data.content: 19 | url = json.loads(data.content).get('data')[0].get('url') 20 | print url 21 | # download 1 22 | # urllib.urlretrieve(url, url.split('/')[-1]) 23 | 24 | # download 2 25 | # r = requests.get(url) 26 | # with open(url.split('/')[-1], "wb") as code: 27 | # code.write(r.content) 28 | 29 | # download 3 30 | f = urllib2.urlopen(url) 31 | with open(url.split('/')[-1], "wb") as code: 32 | code.write(f.read()) 33 | -------------------------------------------------------------------------------- /skills/httpserver.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-4-24 下午2:22 4 | # @Author : Tom.Lee 5 | # @Description : 6 | # @File : httpserver.py 7 | # @Product : PyCharm 8 | 9 | """ 10 | python 服务器 11 | """ 12 | import socket 13 | import select 14 | import sys 15 | from wsgiref.simple_server import make_server 16 | 17 | """ 18 | 用标准库的wsgiref单独起一个服务器监听端口 19 | """ 20 | 21 | 22 | def simple_app(environ, start_response): 23 | status = '200 OK' 24 | response_headers = [('Content-type', 'text/plain')] 25 | start_response(status, response_headers) 26 | # print environ, start_response 27 | return ['Hello world!\n'] 28 | 29 | 30 | # httpd = make_server('', 10001, simple_app) 31 | # httpd.serve_forever() 32 | 33 | """ 34 | 代理服务器 35 | """ 36 | 37 | to_addr = ('127.0.0.1', 10001) # 转发的地址 38 | 39 | 40 | class Proxy: 41 | def __init__(self, addr): 42 | self.proxy = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 43 | self.proxy.bind(addr) 44 | self.proxy.listen(10) 45 | self.inputs = [self.proxy] 46 | self.route = {} 47 | 48 | def serve_forever(self): 49 | print 'proxy listen...' 50 | while 1: 51 | readable, _, _ = select.select(self.inputs, [], []) 52 | for self.sock in readable: 53 | if self.sock == self.proxy: 54 | self.on_join() 55 | else: 56 | data = self.sock.recv(8096) 57 | if not data: 58 | self.on_quit() 59 | else: 60 | self.route[self.sock].send(data) 61 | 62 | def on_join(self): 63 | client, addr = self.proxy.accept() 64 | print addr, 'connect' 65 | forward = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 66 | forward.connect(to_addr) 67 | self.inputs += [client, forward] 68 | self.route[client] = forward 69 | self.route[forward] = client 70 | 71 | def on_quit(self): 72 | for s in self.sock, self.route[self.sock]: 73 | self.inputs.remove(s) 74 | del self.route[s] 75 | s.close() 76 | 77 | 78 | if __name__ == '__main__': 79 | try: 80 | Proxy(('', 12345)).serve_forever() # 代理服务器监听的地址 81 | except KeyboardInterrupt: 82 | sys.exit(1) 83 | -------------------------------------------------------------------------------- /standard_library/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2017/4/29 14:43 4 | # @Author : Aries 5 | # @Site : 6 | # @File : __init__.py.py 7 | # @Software: PyCharm 8 | 9 | -------------------------------------------------------------------------------- /standard_library/email/message_html.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-29 下午2:33 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : html_message.py 7 | # @Product : PyCharm 8 | # @Docs : 9 | # @Source : 10 | 11 | import smtplib 12 | 13 | from email.mime.multipart import MIMEMultipart 14 | from email.mime.text import MIMEText 15 | 16 | # #config 17 | FROM_ADDRESS = '' # sender's address 18 | EMAIL_PWD = '' # password 19 | TO_ADDRESSES = '' # recipient's email address 20 | SMTP_HOST = '' 21 | SMTP_PORT = 25 22 | 23 | # Create message container - the correct MIME type is multipart/alternative. 24 | msg = MIMEMultipart('alternative') 25 | msg['Subject'] = "Link" 26 | msg['From'] = FROM_ADDRESS 27 | msg['To'] = TO_ADDRESSES 28 | 29 | # Create the body of the message (a plain-text and an HTML version). 30 | text = "Hi!\nHow are you?\nHere is the link you wanted:\nhttps://www.python.org" 31 | html = """\ 32 | 33 | 34 | 35 |

Hi!
36 | How are you?
37 | Here is the link you wanted. 38 |

39 | 40 | 41 | """ 42 | 43 | # Record the MIME types of both parts - text/plain and text/html. 44 | part1 = MIMEText(text, 'plain') 45 | part2 = MIMEText(html, 'html') 46 | 47 | # Attach parts into message container. 48 | # According to RFC 2046, the last part of a multipart message, in this case 49 | # the HTML message, is best and preferred. 50 | msg.attach(part1) 51 | msg.attach(part2) 52 | 53 | # Send the message via local SMTP server. 54 | s = smtplib.SMTP() 55 | s.connect(host=SMTP_HOST, port=SMTP_PORT) 56 | s.login(FROM_ADDRESS, EMAIL_PWD) 57 | 58 | # sendmail function takes 3 arguments: sender's address, recipient's address 59 | # and message to send - here it is sent as one string. 60 | s.sendmail(FROM_ADDRESS, [TO_ADDRESSES], msg.as_string()) 61 | s.quit() 62 | -------------------------------------------------------------------------------- /standard_library/email/message_text.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-28 下午4:31 4 | # @Author : Tom.Lee 5 | # @File : text_message.py 6 | # @Product : PyCharm 7 | # @Source : 8 | 9 | """创建并发送简单文本消息""" 10 | import smtplib 11 | from email.mime.text import MIMEText 12 | 13 | # # config email 14 | me = '' 15 | you = '' 16 | smtp_host = '' 17 | smtp_port = 25 18 | passwd = '' 19 | textfile = 'textfile' 20 | 21 | # Open a plain text file for reading. For this example, assume that 22 | # the text file contains only ASCII characters. 23 | fp = open(textfile, 'rb') 24 | # Create a text/plain message 25 | msg = MIMEText(fp.read(), 'text', 'utf-8') 26 | fp.close() 27 | 28 | # me == the sender's email address 29 | # you == the recipient's email address 30 | msg['Subject'] = 'The contents of %s' % textfile 31 | msg['From'] = me 32 | msg['To'] = you 33 | 34 | # Send the message via our own SMTP server, but don't include the 35 | # envelope header. 36 | s = smtplib.SMTP() 37 | s.connect(host=smtp_host, port=smtp_port) 38 | s.set_debuglevel(1) 39 | s.login(me, passwd) 40 | s.sendmail(me, [you], msg.as_string()) 41 | s.quit() 42 | -------------------------------------------------------------------------------- /standard_library/email/shell_mime.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-28 下午4:09 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : mime.py 7 | # @Product : PyCharm 8 | # @Docs : 9 | # @Source : 10 | 11 | 12 | """Send the contents of a directory as a MIME message.""" 13 | 14 | import os 15 | import smtplib 16 | import sys 17 | 18 | # For guessing MIME type based on file name extension 19 | import mimetypes 20 | 21 | from optparse import OptionParser 22 | 23 | from email import encoders 24 | from email.mime.audio import MIMEAudio 25 | from email.mime.base import MIMEBase 26 | from email.mime.image import MIMEImage 27 | from email.mime.multipart import MIMEMultipart 28 | from email.mime.text import MIMEText 29 | 30 | COMMASPACE = ', ' 31 | 32 | 33 | def main(): 34 | parser = OptionParser(usage="""\ 35 | Send the contents of a directory as a MIME message. 36 | 37 | Usage: %prog [options] 38 | 39 | Unless the -o option is given, the email is sent by forwarding to your local 40 | SMTP server, which then does the normal delivery process. Your local machine 41 | must be running an SMTP server. 42 | """) 43 | parser.add_option('-d', '--directory', 44 | type='string', action='store', 45 | help="""Mail the contents of the specified directory, 46 | otherwise use the current directory. Only the regular 47 | files in the directory are sent, and we don't recurse to 48 | subdirectories.""") 49 | parser.add_option('-o', '--output', 50 | type='string', action='store', metavar='FILE', 51 | help="""Print the composed message to FILE instead of 52 | sending the message to the SMTP server.""") 53 | parser.add_option('-s', '--sender', 54 | type='string', action='store', metavar='SENDER', 55 | help='The value of the From: header (required)') 56 | parser.add_option('-r', '--recipient', 57 | type='string', action='append', metavar='RECIPIENT', 58 | default=[], dest='recipients', 59 | help='A To: header value (at least one required)') 60 | opts, args = parser.parse_args() 61 | if not opts.sender or not opts.recipients: 62 | parser.print_help() 63 | sys.exit(1) 64 | directory = opts.directory 65 | if not directory: 66 | directory = '.' 67 | # Create the enclosing (outer) message 68 | outer = MIMEMultipart() 69 | outer['Subject'] = 'Contents of directory %s' % os.path.abspath(directory) 70 | outer['To'] = COMMASPACE.join(opts.recipients) 71 | outer['From'] = opts.sender 72 | outer.preamble = 'You will not see this in a MIME-aware mail reader.\n' 73 | 74 | for filename in os.listdir(directory): 75 | path = os.path.join(directory, filename) 76 | if not os.path.isfile(path): 77 | continue 78 | # Guess the content type based on the file's extension. Encoding 79 | # will be ignored, although we should check for simple things like 80 | # gzip'd or compressed files. 81 | ctype, encoding = mimetypes.guess_type(path) 82 | if ctype is None or encoding is not None: 83 | # No guess could be made, or the file is encoded (compressed), so 84 | # use a generic bag-of-bits type. 85 | ctype = 'application/octet-stream' 86 | maintype, subtype = ctype.split('/', 1) 87 | if maintype == 'text': 88 | fp = open(path) 89 | # Note: we should handle calculating the charset 90 | msg = MIMEText(fp.read(), _subtype=subtype) 91 | fp.close() 92 | elif maintype == 'image': 93 | fp = open(path, 'rb') 94 | msg = MIMEImage(fp.read(), _subtype=subtype) 95 | fp.close() 96 | elif maintype == 'audio': 97 | fp = open(path, 'rb') 98 | msg = MIMEAudio(fp.read(), _subtype=subtype) 99 | fp.close() 100 | else: 101 | fp = open(path, 'rb') 102 | msg = MIMEBase(maintype, subtype) 103 | msg.set_payload(fp.read()) 104 | fp.close() 105 | # Encode the payload using Base64 106 | encoders.encode_base64(msg) 107 | # Set the filename parameter 108 | msg.add_header('Content-Disposition', 'attachment', filename=filename) 109 | outer.attach(msg) 110 | # Now send or store the message 111 | composed = outer.as_string() 112 | if opts.output: 113 | fp = open(opts.output, 'w') 114 | fp.write(composed) 115 | fp.close() 116 | else: 117 | s = smtplib.SMTP('localhost') 118 | s.sendmail(opts.sender, opts.recipients, composed) 119 | s.quit() 120 | 121 | 122 | if __name__ == '__main__': 123 | main() 124 | -------------------------------------------------------------------------------- /standard_library/email/textfile: -------------------------------------------------------------------------------- 1 | hello world! 2 | use python smtplib. -------------------------------------------------------------------------------- /standard_library/process/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-5-7 下午11:15 4 | # @Author : tom.lee 5 | # @Site : 6 | # @File : __init__.py.py 7 | # @Software: PyCharm -------------------------------------------------------------------------------- /standard_library/process/process_pool.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import random 3 | from time import sleep 4 | import sys 5 | import multiprocessing 6 | import os 7 | 8 | lock = multiprocessing.Lock() # 一个锁 9 | 10 | 11 | def a(x): 12 | lock.acquire() 13 | print '开始进程:', os.getpid(), '模拟进程时间:', x 14 | lock.release() 15 | sleep(x) # 模拟执行操作 16 | lock.acquire() 17 | print '结束进程:', os.getpid(), '预测下一个进程启动会使用该进程号' 18 | lock.release() 19 | 20 | 21 | list = [] 22 | for i in range(10): 23 | list.append(random.randint(1, 10)) 24 | pool = multiprocessing.Pool(processes=3) # 限制并行进程数为3 25 | pool.map(a, list) 26 | -------------------------------------------------------------------------------- /standard_library/process/simple_core.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-5-7 下午11:16 4 | # @Author : tom.lee 5 | # @Site : 6 | # @File : simple_core.py 7 | # @Software: PyCharm 8 | 9 | 10 | from multiprocessing import Process 11 | import time 12 | import os 13 | 14 | 15 | def worker_1(interval): 16 | print("worker_1,父进程(%s),当前进程(%s)" % (os.getppid(), os.getpid())) 17 | t_start = time.time() 18 | time.sleep(interval) 19 | t_end = time.time() 20 | print("worker_1,执行时间为'%0.2f'秒" % (t_end - t_start)) 21 | 22 | 23 | def worker_2(interval): 24 | print("worker_2,父进程(%s),当前进程(%s)" % (os.getppid(), os.getpid())) 25 | t_start = time.time() 26 | time.sleep(interval) 27 | t_end = time.time() 28 | print("worker_2,执行时间为'%0.2f'秒" % (t_end - t_start)) 29 | 30 | 31 | if __name__ == "__main__": 32 | print("进程ID:%s" % os.getpid()) 33 | # 如果不指定name参数,默认的进程对象名称为Process-N,N为一个递增的整数 34 | p1 = Process(target=worker_1, args=(20,)) 35 | p1.start() 36 | # p1.join() # 阻塞,禁止并发 37 | p2 = Process(target=worker_2, name="dongGe", args=(10,)) 38 | 39 | p2.start() 40 | print("p2.is_alive=%s" % p2.is_alive()) 41 | print("p1.name=%s" % p1.name) 42 | print("p1.pid=%s" % p1.pid) 43 | print("p2.name=%s" % p2.name) 44 | print("p2.pid=%s" % p2.pid) 45 | # p1.join() # 然而没什么卵用 46 | print("p1.is_alive=%s" % p1.is_alive()) 47 | -------------------------------------------------------------------------------- /standard_library/study_argparse.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-7-5 下午2:14 4 | # @Author : Tom.Lee 5 | # @Description : 6 | # @File : study_argparse.py 7 | # @Product : PyCharm 8 | 9 | import argparse 10 | 11 | if __name__ == '__main__': 12 | parser = argparse.ArgumentParser(description='test argparse') 13 | 14 | parser.add_argument('--user', dest='USER', type=str, 15 | required=True, 16 | help='User Name') 17 | parser.add_argument('-H', '--host', dest='HOST', type=str, 18 | default='localhost', 19 | help='Server Ip Address') 20 | parser.add_argument('-P', '--port', dest='PORT', type=int, 21 | default=3306, 22 | help='Server Connection Port') 23 | 24 | args = parser.parse_args() 25 | print args 26 | print getattr(args, 'no', None) 27 | print getattr(args, 'PORT', None) 28 | -------------------------------------------------------------------------------- /standard_library/study_color_print.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-7-21 下午2:50 4 | # @Author : Tom.Lee 5 | # @File : study_color_print.py 6 | # @Product : PyCharm 7 | 8 | """ 9 | \033[1;31;40m 10 | \033[0m 11 | """ 12 | 13 | print '\033[1;31;40m ' 14 | print '*' * 25, 'LOG', '*' * 25 15 | print 'hello world!' 16 | print '\033[0m' 17 | -------------------------------------------------------------------------------- /standard_library/study_file.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | 3 | """ 4 | 文件操作 5 | """ 6 | 7 | import errno 8 | import os 9 | 10 | import six 11 | 12 | 13 | def create_dir(path): 14 | if not os.path.exists(path): 15 | os.makedirs(path) 16 | else: 17 | print u'文件夹%s 已经存在' % path 18 | 19 | 20 | def parent_dir(path): 21 | if path[-1] == '/': path = path[0:-1] 22 | return '/'.join(path.split('/')[0:-1]) 23 | 24 | 25 | def del_dir(path): 26 | if not all((os.path.exists(path), os.path.isdir(path))): 27 | return 28 | for root, dirs, files in os.walk(path, topdown=False): 29 | for name in files: 30 | os.remove(os.path.join(root, name)) 31 | for name in dirs: 32 | os.rmdir(os.path.join(root, name)) 33 | os.rmdir(path) 34 | 35 | 36 | def create_file(name, mode='r', data=""): 37 | try: 38 | parent_path = parent_dir(name) 39 | if parent_path and not os.path.exists(parent_path): 40 | create_dir(parent_path) 41 | with open(name, mode)as f: 42 | f.write(data) 43 | except Exception, e: 44 | print u'%s 创建失败\n异常:%s' % (name, e) 45 | 46 | 47 | def remove_file(file_path): 48 | try: 49 | os.remove(file_path) 50 | except OSError: 51 | pass 52 | 53 | 54 | def get_file_size(file_obj): 55 | 56 | if (hasattr(file_obj, 'seek') and hasattr(file_obj, 'tell') and 57 | (six.PY2 or six.PY3 and file_obj.seekable())): 58 | try: 59 | curr = file_obj.tell() 60 | file_obj.seek(0, os.SEEK_END) 61 | size = file_obj.tell() 62 | file_obj.seek(curr) 63 | return size 64 | except IOError as e: 65 | if e.errno == errno.ESPIPE: 66 | return 67 | else: 68 | raise 69 | 70 | 71 | if __name__ == '__main__': 72 | # create_file('/home/aric/pythontest/bb/bbb/abc.txt', 'w', 'hello world') 73 | 74 | del_dir('/home/liyuanjun/keys') 75 | -------------------------------------------------------------------------------- /standard_library/study_filter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-7-22 上午1:18 4 | # @Author : tom.lee 5 | # @Site : 6 | # @File : study_filter.py 7 | # @Software: PyCharm 8 | 9 | """ 10 | 按照某种规则过滤掉一些元素 11 | 12 | 接收一个 boolean返回值的函数,可用时lambda,可以是自定义的函数, 13 | 迭代传入的可迭代对象的每个元素进行过滤 14 | """ 15 | 16 | lst = [1, 2, 3, 4, 5, 6] 17 | # 所有奇数都会返回True, 偶数会返回False被过滤掉 18 | print filter(lambda x: x % 2 != 0, lst) 19 | # 输出结果 [1, 3, 5] 20 | 21 | -------------------------------------------------------------------------------- /standard_library/study_httplib.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-7-21 下午2:45 4 | # @Author : Tom.Lee 5 | # @File : study_httplib.py 6 | # @Product : PyCharm 7 | import httplib 8 | import urllib 9 | 10 | 11 | def request_get(host, port, url, params=None, headers=None, timeout=5): 12 | status, http_clint, data = None, None, None 13 | try: 14 | http_clint = httplib.HTTPConnection(host, port, timeout=timeout) 15 | url = url + urllib.urlencode(params or {}) 16 | http_clint.request('GET', url, headers=headers or {}) 17 | response = http_clint.getresponse() 18 | status = response.status 19 | data = response.read() 20 | except Exception, e: 21 | print e 22 | finally: 23 | if http_clint: 24 | http_clint.close() 25 | return status, data 26 | 27 | 28 | def request_post(host, port, url, body=None, headers=None, timeout=5): 29 | status, http_clint, data = None, None, None 30 | try: 31 | http_clint = httplib.HTTPConnection(host, port, timeout=timeout) 32 | http_clint.request('POST', url, body, headers) 33 | response = http_clint.getresponse() 34 | status = response.status 35 | data = response.read() 36 | 37 | except Exception, e: 38 | print 'http post error :{0}'.format(e) 39 | finally: 40 | if http_clint: 41 | http_clint.close() 42 | return status, data 43 | -------------------------------------------------------------------------------- /standard_library/study_itertools.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-9-8 下午1:50 4 | # @Author : Tom.Lee 5 | # @File : study_itertools.py 6 | # @Product : PyCharm 7 | # @Source : 8 | 9 | 10 | import itertools 11 | 12 | """ 13 | 合并多个词为一个列表:  14 | >>>itertools.chain(*iterable) 15 | """ 16 | lst = itertools.chain('hello', 'world', '!') 17 | print type(lst) # 18 | print list(lst) # ['h', 'e', 'l', 'l', 'o', 'w', 'o', 'r', 'l', 'd', '!'] 19 | 20 | """ 21 | 返回指定长度的序列中的字符"组合"(排列组合): 22 | >>>itertools.combinations(iterable, r) 23 | """ 24 | lst1 = itertools.combinations('abc', 2) 25 | print list(lst1) # [('a', 'b'), ('a', 'c'), ('b', 'c')] 26 | 27 | """ 28 | 返回指定长度的“组合”,组合内元素可重复: 29 | >>>itertools.combinations_with_replacement(iterable, r) 30 | """ 31 | ls2 = itertools.combinations_with_replacement('abc', 2) 32 | print list(ls2) 33 | -------------------------------------------------------------------------------- /standard_library/study_logging.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-7-5 下午1:10 4 | # @Author : Tom.Lee 5 | # @Description : 6 | # @File : study_logging.py 7 | # @Product : PyCharm 8 | 9 | """ 10 | 注意:basicConfig有一个 很大的缺点。 11 | 调用basicConfig其实是给root logger添加了一个handler, 12 | 这样当你的程序和别的使用了 logging的第三方模块一起工作时, 13 | 会影响第三方模块的logger行为。这是由logger的继承特性决定的。 14 | """ 15 | 16 | import logging 17 | import sys 18 | 19 | FORMAT_STR = "[%(asctime)s] %(name)s:%(levelname)s: %(message)s" 20 | 21 | 22 | # logger = logging.getLogger("django") 23 | # logger.debug(logging.DEBUG) # 使用django热加载 24 | 25 | 26 | def config1(): 27 | """ 28 | **********************Config 1********************** 29 | """ 30 | # config 1. 31 | # 设置默认的level为DEBUG 32 | # 设置log的格式 33 | # 注意:basicConfig有一个 很大的缺点。 34 | # 调用basicConfig其实是给root logger添加了一个handler, 35 | # 这样当你的程序和别的使用了 logging的第三方模块一起工作时, 36 | # 会影响第三方模块的logger行为。这是由logger的继承特性决定的。 37 | logging.basicConfig( 38 | level=logging.DEBUG, 39 | format="[%(asctime)s] %(name)s:%(levelname)s: %(message)s" 40 | ) 41 | 42 | # 记录log 43 | logging.debug('debug') 44 | logging.info('info') 45 | logging.warn('warn') 46 | logging.error('error') 47 | logging.critical('critical') 48 | 49 | 50 | def config2(): 51 | """ 52 | ********************Config 2************************ 53 | """ 54 | # # config 2 55 | # 使用一个名字为fib的logger 56 | logger = logging.getLogger('app_name') 57 | # 设置logger的level为DEBUG 58 | logger.setLevel(logging.DEBUG) 59 | # 创建一个输出日志到控制台的StreamHandler 60 | handler = logging.StreamHandler() 61 | formatter = logging.Formatter('[%(asctime)s] %(name)s:%(levelname)s: %(message)s') 62 | handler.setFormatter(formatter) 63 | # 给logger添加上handler 64 | logger.addHandler(handler) 65 | 66 | logger.debug('debug message') 67 | logger.info('hello world') 68 | 69 | 70 | def config3(): 71 | """ 72 | config3 输出到文件 73 | """ 74 | # 获取logger实例,如果参数为空则返回root logger 75 | logger = logging.getLogger("AppName") 76 | # 指定logger输出格式 77 | formatter = logging.Formatter(FORMAT_STR) 78 | # 文件日志 79 | file_handler = logging.FileHandler("test.log") 80 | file_handler.setFormatter(formatter) # 可以通过setFormatter指定输出格式 81 | # 控制台日志 82 | console_handler = logging.StreamHandler(sys.stdout) 83 | console_handler.formatter = formatter # 也可以直接给formatter赋值 84 | # 为logger添加的日志处理器,可以自定义日志处理器让其输出到其他地方 85 | logger.addHandler(file_handler) 86 | logger.addHandler(console_handler) 87 | # 指定日志的最低输出级别,默认为WARN级别 88 | logger.setLevel(logging.INFO) 89 | 90 | # 输出不同级别的log 91 | logger.debug('this is debug info') 92 | logger.info('this is information') 93 | logger.warn('this is warning message') 94 | logger.error('this is error message') 95 | logger.fatal('this is fatal message, it is same as logger.critical') 96 | logger.critical('this is critical message') 97 | 98 | 99 | 100 | # if __name__ == '__main__': 101 | 102 | -------------------------------------------------------------------------------- /standard_library/study_os.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-4-19 上午11:02 4 | # @Author : Tom.Lee 5 | # @Description : 6 | # @File : helper_os.py 7 | # @Product : PyCharm 8 | import commands 9 | import os 10 | import sys 11 | 12 | 13 | def shell(): 14 | command_ls = 'ls -al /opt' 15 | command_docker = 'docker ps -a' 16 | 17 | # 使用os.system()模块 18 | ros = os.system(command_ls) 19 | print '\n\nos.system() : ', ros 20 | 21 | # 使用os.popen()模块 22 | output = os.popen(command_docker) 23 | result = output.read() 24 | print '\n\nos.popen() : ', result 25 | 26 | # 使用commands模块 27 | (status, output) = commands.getstatusoutput(command_docker) 28 | print '\n\ncommands : ', status, output 29 | 30 | 31 | def deep_look_dir(dir_path, deep=1, console_full_path=False): 32 | """ 33 | deep_look_dir(dir_name, console_full_path=False) 34 | 35 | 遍历文件夹下所有文件 36 | :param dir_path: os.path.dirname(__file__) 37 | :param deep: 38 | :param console_full_path: 39 | :return: 40 | """ 41 | if deep == 1: 42 | print dir_path 43 | 44 | files = os.listdir(dir_path) 45 | split_symbol = '|_' * deep if deep == 1 else '|' + ' ' * (deep - 1) + '|_' 46 | 47 | for f in files: 48 | f_path = os.path.join(dir_path, f) 49 | console_name = f_path if console_full_path else f 50 | 51 | if not os.path.isfile(f_path): 52 | print "{sp} {dir_path}/: ".format( 53 | sp=split_symbol, 54 | dir_path=console_name) 55 | num = deep + 1 56 | deep_look_dir(f_path, num, console_full_path) 57 | 58 | else: 59 | print split_symbol, console_name 60 | 61 | 62 | if '__main__' == __name__: 63 | deep_look_dir('/root/pythonStudy') 64 | -------------------------------------------------------------------------------- /standard_library/study_regular_expression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-8-11 上午11:56 4 | # @Author : Tom.Lee 5 | # @CopyRight : 2016-2017 OpenBridge by yihecloud 6 | # @File : *regular_expression.py 7 | # @Product : PyCharm 8 | # @docs : http://www.cnblogs.com/dreamer-fish/p/5282679.html 9 | 10 | import re 11 | 12 | """ 13 | 正则表达式: r'[...]' ,[]内为要匹配的字符,用"|"来表示多种匹配 14 | 15 | 1.特殊符号使用"\"转义: "[" --> "\[" 16 | 2.替换字符串:将123替换为空 re.compile(r'[123]').sub('', str) 17 | 3.查找特殊字符: 使用r'[...]'表示一组字符,单独列出:[amk] 匹配 'a','m'或'k' 18 | 19 | """ 20 | 21 | # **********************替换字符*********************** 22 | # 23 | # re.compile(r'[...]').sub('', str) 24 | # 25 | # **********************替换字符*********************** 26 | 27 | 28 | # 1.去掉字符串中无用的字符 "[u'","'", "u'" ,"']" 29 | s = "[u'node-2.domain.tld', u'node-1.domain.tld']" 30 | s1 = re.compile(r"[\[u'|'\]| u']").sub('', s).split(',') 31 | print s1, type(s1) # ['node-2.domain.tld', 'node-1.domain.tld'] 32 | 33 | # 2.替换空格为'--' 34 | print re.compile(r'\s').sub('--', '1234 56 ') # 1234--56-- 35 | 36 | # **********************匹配查找字符*********************** 37 | # 38 | # re.findall(r'*', content) 默认匹配每一行字符串为查找对象 39 | # re.findall(r'[\d+]', s, re.S) 匹配时以整个字符串为查找对象 40 | # 41 | # **********************匹配查找字符*********************** 42 | 43 | # 1.提取字符中的数字 44 | s = '123abc456@#$%^7890' 45 | print re.findall(r'[\d+]', s) # ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0'] 46 | print re.findall(r'\d+', s, re.S) # ['123', '456', '7890'] 47 | 48 | # 2.匹配Cidr 172.16.6.18/24 49 | print re.findall(r'(?