├── .gitignore
├── LICENSE.md
├── OpenStack
    └── oslo_
    │   ├── README.md
    │   ├── config
    │       ├── config.conf
    │       ├── config_parser.py
    │       └── config_test.py
    │   └── i18n
    │       └── i18n_app
    │           ├── __init__.py
    │           ├── _i18n.py
    │           ├── locale
    │               ├── zh_CN
    │               │   └── LC_MESSAGES
    │               │   │   └── i18n_app.po
    │               └── zh_TW
    │               │   └── LC_MESSAGES
    │               │       └── i18n_app.po
    │           └── main.py
├── README.md
├── _config.yml
├── algorithms
    ├── algorithm_sorting.py
    └── question1.py
├── basic_grammar.md
├── class1_preliminary.md
├── class2_annotation.md
├── class3_inherit.md
├── class4_thorough.md
├── contributed_modules
    ├── mongodb
    │   └── mongodb_utils.py
    ├── mysql
    │   ├── mysqldb_
    │   │   ├── __init__.py
    │   │   ├── mysql_lock.py
    │   │   └── study_mysqldb.py
    │   └── sqlalchemy_
    │   │   ├── __init__.py
    │   │   ├── mysql_lock.py
    │   │   └── study_sqlalchemy.py
    ├── redis
    │   ├── README.md
    │   ├── redis_helper.py
    │   └── redis_test.py
    └── requests
    │   ├── README.md
    │   ├── __init__.py
    │   ├── restful.py
    │   ├── test.py
    │   └── utils.py
├── crawlers
    └── spider
    │   ├── __init__.py
    │   ├── downloader.py
    │   ├── main.py
    │   ├── parser.py
    │   ├── proxypools.py
    │   ├── tools.py
    │   ├── urlsmanager.py
    │   └── writer.py
├── data_analysis
    ├── __init__.py
    ├── academic_concept
    │   └── matrix_product.md
    ├── study_matplotlib
    │   ├── __init__.py
    │   ├── graphs
    │   │   ├── __init__.py
    │   │   ├── graphs_histogram.py
    │   │   ├── graphs_quadratic.py
    │   │   └── graphs_trigonometric.py
    │   ├── matplotlib_2d.py
    │   ├── png
    │   │   └── numpy.png
    │   ├── save_file
    │   │   ├── graphs_histogram.png
    │   │   ├── graphs_quadratic.png
    │   │   └── graphs_trigonometric.png
    │   └── test.py
    ├── study_mlab
    │   ├── __init__.py
    │   └── mlab_3d.py
    ├── study_numpy
    │   ├── __init__.py
    │   ├── _test.py
    │   ├── numpy_functions
    │   │   ├── np_arange.py
    │   │   ├── np_dot.py
    │   │   ├── np_mgrid_ogrid.py
    │   │   └── np_random.py
    │   ├── numpy_multidimensional.py
    │   ├── numpy_ndarray.py
    │   ├── numpy_polynomial_poly1d.py
    │   ├── numpy_ufunc.py
    │   └── png
    │   │   └── numpy.png
    └── study_tesseract
    │   ├── __init__.py
    │   ├── image
    │       └── 20170807142300.png
    │   └── test01.py
├── data_structure.md
├── decorator.md
├── dict.md
├── file.md
├── levenshtein.py
├── list.md
├── loop.md
├── page_parser
    ├── __init__.py
    ├── beautifulsoup
    │   ├── __init__.py
    │   ├── parser.py
    │   ├── test.py
    │   └── test_403.py
    └── xpath
    │   ├── __init__.py
    │   ├── file.txt
    │   └── test.py
├── rpc
    └── RPyC
    │   ├── demo.py
    │   └── tutorials
    │       ├── part01.py
    │       └── services
    │           └── registry_discovery
    │               ├── __init__.py
    │               ├── client_test.py
    │               └── service01.py
├── scheduler_task
    └── study_apscheduler
    │   ├── __init__.py
    │   ├── examples
    │       ├── demo.py
    │       ├── executors
    │       │   ├── __init__.py
    │       │   ├── configure.py
    │       │   ├── process_pool.py
    │       │   └── simple.py
    │       ├── jobstores
    │       │   ├── __init__.py
    │       │   ├── job_store.py
    │       │   └── log.py
    │       └── schedules
    │       │   ├── __init__.py
    │       │   └── schdule.py
    │   └── tutorials
    │       └── __init__.py
├── set.md
├── skills
    ├── README.md
    ├── async_call.py
    ├── download_music.py
    └── httpserver.py
├── standard_library
    ├── __init__.py
    ├── email
    │   ├── message_html.py
    │   ├── message_text.py
    │   ├── shell_mime.py
    │   └── textfile
    ├── process
    │   ├── __init__.py
    │   ├── process_pool.py
    │   └── simple_core.py
    ├── study_argparse.py
    ├── study_color_print.py
    ├── study_file.py
    ├── study_filter.py
    ├── study_httplib.py
    ├── study_itertools.py
    ├── study_logging.py
    ├── study_os.py
    ├── study_regular_expression.py
    ├── study_socket.py
    └── threads
    │   ├── __init__.py
    │   ├── demo_consumer_producer.py
    │   ├── my_thread_pool.py
    │   ├── rethread.py
    │   ├── thread_pool_test.py
    │   └── thread_pool_test2.py
└── use_package.md


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | ### Python template
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | env/
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *,cover
 49 | .hypothesis/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | local_settings.py
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # dotenv
 85 | .env
 86 | 
 87 | # virtualenv
 88 | .venv
 89 | venv/
 90 | ENV/
 91 | 
 92 | # Spyder project settings
 93 | .spyderproject
 94 | 
 95 | # Rope project settings
 96 | .ropeproject
 97 | 
 98 | .idea/
 99 | *.*~
100 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 tom.lee
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/OpenStack/oslo_/README.md:
--------------------------------------------------------------------------------
1 | # [oslo公共库](https://docs.openstack.org/oslo.config/latest/)
2 | >　OpenStack开源公共库
3 | 


--------------------------------------------------------------------------------
/OpenStack/oslo_/config/config.conf:
--------------------------------------------------------------------------------
 1 | [DEFAULT]
 2 | # DEFAULT 不可省略,必须大写
 3 | enabled_api = ec2, api_keystone, api_compute
 4 | bind_host = 196.168.1.111
 5 | bind_port = 9999
 6 | 
 7 | [RABBIT]
 8 | host = 127.0.0.1
 9 | port = 12345
10 | ssl = true
11 | username = guest
12 | password = guest


--------------------------------------------------------------------------------
/OpenStack/oslo_/config/config_parser.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-8-31 下午1:29
 4 | # @Author         : Tom.Lee
 5 | # @File           : config_parser.py
 6 | # @Product        : PyCharm
 7 | # @Docs           : 
 8 | # @Source         : 
 9 | 
10 | from oslo_config import cfg
11 | from oslo_config import types
12 | 
13 | 
14 | class ConfigManager(object):
15 |     PortType = types.Integer(1, 65535)
16 |     default_opts = [
17 |         cfg.StrOpt(
18 |             'bind_host',
19 |             default='0.0.0.0',
20 |             help='IP address to listen on.'),
21 |         cfg.Opt(
22 |             'bind_port',  # 只有Opt类型才能指定PortType
23 |             type=PortType,
24 |             default=9292,
25 |             help='Port number to listen on.')
26 |     ]
27 |     default_opt = cfg.ListOpt(
28 |         'enabled_api',
29 |         default=['ec2', 'api_compute'],
30 |         help='List of APIs to enable by default.')
31 |     cli_opts = [
32 |         cfg.BoolOpt('verbose',
33 |                     short='v',
34 |                     default=False,
35 |                     help='Print more verbose output'),
36 |         cfg.BoolOpt('debug',
37 |                     short='d',
38 |                     default=False,
39 |                     help='Print debugging output'),
40 |     ]
41 |     rabbit_group = cfg.OptGroup(
42 |         name='RABBIT',
43 |         title='RABBIT options'
44 |     )
45 |     rabbit_opt = cfg.BoolOpt(
46 |         'ssl',
47 |         default=False,
48 |         help='use ssl for connection')
49 |     rabbit_opts = [
50 |         cfg.StrOpt(
51 |             'host',
52 |             default='localhost',
53 |             help='IP/hostname to listen on.'),
54 |         cfg.IntOpt(
55 |             'port',
56 |             default=5672,
57 |             help='Port number to listen on.')
58 |     ]
59 | 
60 |     def __init__(self):
61 |         self.conf = cfg.CONF
62 |         self._register_opts()
63 | 
64 |     def _register_opts(self):
65 |         # default
66 |         self.conf.register_opt(self.default_opt)
67 |         self.conf.register_opts(self.default_opts)
68 |         # rabbit
69 |         self.conf.register_group(self.rabbit_group)
70 |         self.conf.register_opts(self.rabbit_opts, self.rabbit_group)
71 |         self.conf.register_opt(self.rabbit_opt, self.rabbit_group)
72 |         # cli
73 |         self.conf.register_cli_opts(self.cli_opts)
74 |         self.conf(default_config_files=['config.conf'])
75 | 
76 |     @property
77 |     def bind_port(self):
78 |         return getattr(self.conf, 'bind_port', None)
79 | 
80 |     @property
81 |     def bind_host(self):
82 |         return getattr(self.conf, 'bind_host', None)
83 | 
84 | 
85 | config_manager = ConfigManager()
86 | if __name__ == '__main__':
87 |     print config_manager.bind_port
88 |     print config_manager.bind_host
89 | 


--------------------------------------------------------------------------------
/OpenStack/oslo_/config/config_test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Time           : 17-8-31 上午10:40
  4 | # @Author         : Tom.Lee
  5 | # @File           : config.py
  6 | # @Product        : PyCharm
  7 | # @Docs           : 
  8 | # @Source         : https://github.com/openstack/oslo.config/blob/master/oslo_config/cfg.py
  9 | 
 10 | """
 11 | 配置文件中的选项(group, opts)，必须在代码中显示的注册，否则无法解析
 12 | """
 13 | 
 14 | from oslo_config import cfg
 15 | from oslo_config import types
 16 | 
 17 | # 端口规范
 18 | PortType = types.Integer(1, 65535)
 19 | 
 20 | # 多个配置项组成一个模式
 21 | default_opts = [
 22 |     cfg.StrOpt('bind_host',
 23 |                default='0.0.0.0',
 24 |                help='IP address to listen on.'),
 25 |     cfg.Opt('bind_port',  # 只有Opt类型才能指定PortType
 26 |             type=PortType,
 27 |             default=9292,
 28 |             help='Port number to listen on.')
 29 | ]
 30 | 
 31 | # 单个配置项模式
 32 | default_opt = cfg.ListOpt('enabled_api',
 33 |                           default=['ec2', 'api_compute'],
 34 |                           help='List of APIs to enable by default.')
 35 | 
 36 | # 命令行选项
 37 | cli_opts = [
 38 |     cfg.BoolOpt('verbose',
 39 |                 short='v',
 40 |                 default=False,
 41 |                 help='Print more verbose output'),
 42 |     cfg.BoolOpt('debug',
 43 |                 short='d',
 44 |                 default=False,
 45 |                 help='Print debugging output'),
 46 | ]
 47 | 
 48 | # 配置 rabbit_group 组
 49 | rabbit_group = cfg.OptGroup(
 50 |     name='RABBIT',
 51 |     title='RABBIT options'
 52 | )
 53 | # 配置组中的模式，通常以配置组的名称为前缀（非必须）
 54 | rabbit_opt = cfg.BoolOpt('ssl',
 55 |                          default=False,
 56 |                          help='use ssl for connection')
 57 | # 配置组中的多配置项模式
 58 | rabbit_opts = [
 59 |     cfg.StrOpt('host',
 60 |                default='localhost',
 61 |                help='IP/hostname to listen on.'),
 62 |     cfg.IntOpt('port',
 63 |                default=5672,
 64 |                help='Port number to listen on.')
 65 | ]
 66 | 
 67 | 
 68 | def register_default_opts(conf):
 69 |     """
 70 |     注册默认组的配置项
 71 |     """
 72 |     conf.register_opt(default_opt)
 73 |     conf.register_opts(default_opts)
 74 | 
 75 | 
 76 | def register_rabbit_group(conf):
 77 |     """
 78 |     注册　rabbit 信息
 79 |     """
 80 |     # 配置组必须在其组件被注册前注册！
 81 |     conf.register_group(rabbit_group)
 82 |     # 注册配置组中含有多个配置项的模式，必须指明配置组
 83 |     conf.register_opts(rabbit_opts, rabbit_group)
 84 |     # 注册配置组中的单配置项模式，指明配置组
 85 |     conf.register_opt(rabbit_opt, rabbit_group)
 86 | 
 87 | 
 88 | def register_cli_opts(conf):
 89 |     """
 90 |     注册　cli 选项
 91 |     :param conf:
 92 |     :return:
 93 |     """
 94 |     conf.register_cli_opts(cli_opts)
 95 | 
 96 | 
 97 | def get_bind_host(conf):
 98 |     """
 99 |     使用选项 bind_host
100 |     """
101 |     return getattr(conf, 'bind_host', None)
102 | 
103 | 
104 | def get_bind_port(conf):
105 |     """
106 |     使用选项 bind_port
107 |     """
108 |     return conf.bind_port
109 | 
110 | 
111 | def get_rabbit_username(conf):
112 |     """
113 |     配置文件中存在，代码没有注册，不能解析
114 |     """
115 |     return conf.RABBIT.username
116 | 
117 | 
118 | if __name__ == '__main__':
119 |     # 创建配置类
120 |     config = cfg.CONF
121 |     # 开始注册default
122 |     register_default_opts(config)
123 |     register_rabbit_group(config)
124 |     register_cli_opts(config)
125 | 
126 |     # 加载配置文件
127 |     config(default_config_files=['config.conf'])
128 |     print 'host:', get_bind_host(config)
129 |     # list_all_sections
130 |     for section in config.list_all_sections():
131 |         print section
132 | 
133 |     print config.RABBIT
134 |     print config.RABBIT.host
135 |     print get_rabbit_username(config)
136 | 


--------------------------------------------------------------------------------
/OpenStack/oslo_/i18n/i18n_app/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-9-16 下午2:52
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : __init__.py.py
 7 | # @Product        : PyCharm
 8 | # @Docs           : 
 9 | # @Source         : 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/OpenStack/oslo_/i18n/i18n_app/_i18n.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-9-16 下午3:10
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : _i18n.py.py
 7 | # @Product        : PyCharm
 8 | # @Docs           : https://docs.openstack.org/oslo.i18n/latest/user/usage.html
 9 | # @Source         :
10 | 
11 | import oslo_i18n
12 | 
13 | DOMAIN = "i18n_app"
14 | 
15 | _translators = oslo_i18n.TranslatorFactory(domain=DOMAIN)
16 | 
17 | # The primary translation function using the well-known name "_"
18 | _ = _translators.primary
19 | 
20 | # The contextual translation function using the name "_C"
21 | # requires oslo.i18n >=2.1.0
22 | _C = _translators.contextual_form
23 | 
24 | # The plural translation function using the name "_P"
25 | # requires oslo.i18n >=2.1.0
26 | _P = _translators.plural_form
27 | 
28 | # Translators for log levels.
29 | #
30 | # NOTE(dhellmann): This is not needed for new projects as of the
31 | # Pike series.
32 | #
33 | # The abbreviated names are meant to reflect the usual use of a short
34 | # name like '_'. The "L" is for "log" and the other letter comes from
35 | # the level.
36 | _LI = _translators.log_info
37 | _LW = _translators.log_warning
38 | _LE = _translators.log_error
39 | _LC = _translators.log_critical
40 | 
41 | 
42 | def get_available_languages():
43 |     """
44 |     返回当前可以提供翻译的语言列表
45 | 
46 |     #所有的语言包在　/usr/local/lib/python2.7/dist-packages/babel/locale-data/
47 |     :return:
48 |     """
49 |     return oslo_i18n.get_available_languages(DOMAIN)
50 | 
51 | 
52 | def translate(msg, user_locale='zh_CN'):
53 |     """
54 |     翻译"msg"为指定的语言,默认"en_US"
55 | 
56 |     :param msg: the object to translate
57 |     :param user_locale: the locale to translate the message to, if None the
58 |                         default system locale will be used
59 |                         'en_US' 'zh_CN'
60 |     :returns: the translated object in unicode, or the original object if
61 |               it could not be translated
62 |     """
63 |     return oslo_i18n.translate(msg, user_locale)
64 | 
65 | 
66 | def enable_lazy(enable=True):
67 |     return oslo_i18n.enable_lazy(enable)
68 | 


--------------------------------------------------------------------------------
/OpenStack/oslo_/i18n/i18n_app/main.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-9-16 下午2:53
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : main.py
 7 | # @Product        : PyCharm
 8 | # @Docs           : https://docs.openstack.org/oslo.i18n/latest/user/usage.html
 9 | # @Source         : 
10 | 
11 | from _i18n import get_available_languages
12 | 
13 | languages = get_available_languages()
14 | 
15 | print languages
16 | 
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Python Study
  2 | > 控制台打印乱码： **` print '你好，世界！'.decode('utf-8') `**
  3 | 
  4 | ```python
  5 | url = 'http://{0}:{1}/{2}'.format('0.0.0.0', 2375, 'xxx')
  6 | url = 'http://{ip}:{port}/{uri}'.format(ip='0.0.0.0', port=2375, uri='xxx')
  7 | url = 'http://%s:%d/%s' % ('0.0.0.0', 2375, 'xxx')
  8 | ```
  9 | 
 10 | ## Windows Python 依赖库[ **PythonLibs**](http://www.lfd.uci.edu/~gohlke/pythonlibs/)
 11 | * 1.找到对应的 `whl` 包下载
 12 | * 2.直接`pip install *.whl` 或者修改`.whl`文件为`.zip`文件，解压缩文件的`Python文件夹`复制到--`python`安装目录下的`Lib`--目录下
 13 | 
 14 | ## [Python 中文翻译文档集合](http://python.usyiyi.cn/)
 15 | ## [Python 官方文档](https://docs.python.org/2.7/)
 16 | ## [Top Python APIs](https://www.programcreek.com/python/index/module/list)
 17 | 
 18 | ## Python2.7环境变量
 19 | > 假如`sys.path`不对,则使用Python终端 ` sys.path = [...] `重新设置即可.
 20 | > 默认环境配置如下：
 21 | 
 22 | ```shell
 23 | root@node-40:~# python
 24 | Python 2.7.6 (default, Jun 22 2015, 17:58:13) 
 25 | [GCC 4.8.2] on linux2
 26 | Type "help", "copyright", "credits" or "license" for more information.
 27 | >>> import sys 
 28 | >>> sys.path
 29 | ['', '/usr/lib/python2.7', '/usr/lib/python2.7/plat-x86_64-linux-gnu', '/usr/lib/python2.7/lib-tk', '/usr/lib/python2.7/lib-old', '/usr/lib/python2.7/lib-dynload', '/usr/local/lib/python2.7/dist-packages', '/usr/lib/python2.7/dist-packages', '/usr/lib/python2.7/dist-packages/PILcompat', '/usr/lib/python2.7/dist-packages/gtk-2.0', '/usr/lib/pymodules/python2.7']
 30 | >>> 
 31 | ```
 32 | ```shell
 33 | # /etc/profile
 34 | 
 35 | export PYTHONPATH=/usr/lib/python2.7:/usr/lib/python2.7/plat-x86_64-linux-gnu:/usr/lib/python2.7/lib-tk:/usr/lib/python2.7/lib-old:/usr/lib/python2.7/lib-dynload:/usr/local/lib/python2.7/dist-packages:/usr/lib/python2.7/dist-packages:/usr/lib/python2.7/dist-packages/PILcompat:/usr/lib/python2.7/dist-packages/gtk-2.0:/usr/lib/pymodules/python2.7
 36 | export PATH=$PATH:$PYTHONPATH
 37 | ```
 38 | 
 39 | ## Windows环境`Python2.7`与`Python3.x` 共同使用
 40 | 
 41 | * Python2.7 : `$ py -2`
 42 | * Python3.x : `$ py -3`
 43 | * Python2.7 pip : `$ py -2 -m pip xxx`
 44 | * Python3.x pip : `$ pip3 xxx`
 45 | 
 46 | ## pycharm
 47 | > settings
 48 | 
 49 | * enable Code compatibility inspection: `settings` --> `code compatibility inspection`
 50 | 
 51 | ## Python内置工具
 52 | 
 53 | * 下载服务器：
 54 |   * Python2.x
 55 |      * `$ python -m SimpleHttpServer` 默认端口8000
 56 |      * `$ py -2 -m SimpleHTTPServer` 默认端口8000
 57 |      * `$ py -2 -m SimpleHTTPServer 9090` 指定端口9090
 58 |      * 使用代码：
 59 |      ```python
 60 |      import SimpleHTTPServer
 61 | 
 62 |      SimpleHTTPServer.test()
 63 |      ```
 64 |   * Python3.x
 65 |      * `$ python -m http.server`
 66 |      * `$ py -3 -m http.server`
 67 | 
 68 | * Json格式化：`$ curl http://localhost:8080/get | python -m json.tool`
 69 | 
 70 | * 执行Python代码：`$ python -c "print 'hello world!'"`
 71 | 
 72 | * 解压zip包：
 73 |   * 创建zip包：`$ python -m zipfile -c tom.zip tom.txt`
 74 |   * 解压zip包：`$ python -m zipfile -e tom.zip .`
 75 |   * 查看zip包：`$ python -m zipfile -l tom.zip`
 76 | 
 77 | 
 78 | * 文件处理：
 79 |   ```python
 80 |   import shutil
 81 | 
 82 |   shutil.copy('C:\Users\Administrator\Desktop\ctools2.rar','q.rar')
 83 |   ```
 84 | 
 85 | 
 86 | ## 关于Python工作中的一些总结性技术
 87 | 
 88 | * [爬虫](https://github.com/tomoncle/PythonStudy/tree/master/crawlers/)
 89 | * [RPC](https://github.com/tomoncle/PythonStudy/tree/master/rpc/)
 90 | * [定时任务](https://github.com/tomoncle/PythonStudy/tree/master/scheduler_task/study_apscheduler/)
 91 | * [mysql](https://github.com/tomoncle/PythonStudy/tree/master/contributed_modules/mysql/)
 92 | * [mongodb](https://github.com/tomoncle/PythonStudy/tree/master/contributed_modules/mongodb/)
 93 | * [redis](https://github.com/tomoncle/PythonStudy/tree/master/contributed_modules/redis/)
 94 | * [数据分析](https://github.com/tomoncle/PythonStudy/tree/master/data_analysis/)：`maptplotlib`, `malb` , `numpy`, `tesseract`
 95 | * [页面解析技术](https://github.com/tomoncle/PythonStudy/tree/master/page_parser/): `bs4`, `xpath`
 96 | * [openstack开源模块](https://github.com/tomoncle/PythonStudy/tree/master/OpenStack/oslo_/)
 97 | * [Python 装饰器](https://github.com/tomoncle/PythonStudy/tree/master/decorator.md)
 98 | * [Python 多线程/多进程](https://github.com/tomoncle/PythonStudy//tree/masterstandard_library/threads/)
 99 | * [Python 内置模块](https://github.com/tomoncle/PythonStudy/tree/master/standard_library/)
100 | * [Python 使用技巧](https://github.com/tomoncle/PythonStudy/tree/master/skills)
101 | 
102 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-midnight


--------------------------------------------------------------------------------
/algorithms/algorithm_sorting.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-4-19 上午11:14
 4 | # @Author         : Tom.Lee
 5 | # @Description    : 
 6 | # @File           : algorithm_sorting.py
 7 | # @Product        : PyCharm
 8 | 
 9 | 
10 | def bubble_sort():
11 |     """
12 |     冒泡排序：
13 |     n个元素，循环n-1轮，
14 |     每一轮，比较n-i次，选出最大值
15 |     """
16 |     L = [9, 8, 7, 6, 5, 4, 3, 2, 1]
17 |     n = len(L)
18 |     for i in range(1, n):  # 比较n-1轮
19 |         # print n - i
20 |         for j in range(n - i):  # 每i轮比较n-i次，选出最大值
21 |             if L[j] > L[j + 1]:
22 |                 L[j], L[j + 1] = L[j + 1], L[j]
23 | 
24 |     print L
25 | 
26 | 
27 | def insertion_sort():
28 |     """
29 |     插入排序算法：
30 |     原序列：[2,3,1,34,5,6,11,7,8]
31 | 
32 |     下标从0开始：
33 |     第一次：取下标1 和下标[:1]比较
34 |     第二次：取下标2 和下标[:2]比较
35 |     。。。
36 |     第n-1次：取下标n-1(注意此时的元素已经是最后一个元素了)和[:n-1]比较
37 |     共比较n-1次
38 |     """
39 | 
40 |     L = [9, 8, 7, 5, 6, 4, 3, 2, 1]
41 |     n = len(L)
42 |     for i in range(n - 1):
43 |         for j in range(i + 1):  # 因为下标从0开始，所以第i次，对应的数据位置要 i+1表示当前下标位置
44 |             # print i+1,'-',j
45 |             if L[i + 1] < L[j]: L[i + 1], L[j] = L[j], L[i + 1]
46 | 
47 |     print L
48 | 
49 | 
50 | def selection_sort():
51 |     """
52 |     选择排序算法：
53 | 
54 |     每次找出最小元素，放置到序列头部，循环序列
55 | 
56 |     第一次：找出最小放到下标0
57 |     第二次：在剩余找出最小放到下标1
58 |     。。。
59 |     第n-1次
60 |     """
61 |     L = [5, 4, 3, 2, 1, 0, -77]
62 |     n = len(L)
63 |     for i in range(n - 1):
64 |         for j in range(i + 1, n):
65 |             # print i,'-',j
66 |             if L[i] > L[j]: L[i], L[j] = L[j], L[i]
67 | 
68 |     print L
69 | 


--------------------------------------------------------------------------------
/algorithms/question1.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2017/5/16 21:56
 4 | # @Author  : tom.lee
 5 | # @Site    : 
 6 | # @File    : question1.py
 7 | # @Software:
 8 | 
 9 | """
10 | 输入一个数组a，和一个整数k，计算出这个数组随机组成的数字，大于或等于的值中最小的一个
11 | 
12 | "这个算法有问题．有时间再改吧."
13 | """
14 | 
15 | a = [1, 3, 4, 5]
16 | k = 1222
17 | 
18 | a.sort()
19 | kps = False
20 | ks = list(str(k))
21 | length = len(ks)
22 | 
23 | 
24 | def _min(lis, v):
25 |     for n in lis:
26 |         if n < v:
27 |             continue
28 |         else:
29 |             return n
30 |     return None
31 | 
32 | 
33 | def deep(start, length, kps):
34 |     for i in range(start, length):
35 |         if kps:
36 |             ks[i] = str(a[0])
37 |         if int(ks[i]) in a:
38 |             continue
39 |         else:
40 |             m = _min(a, int(ks[i]))
41 |             kps = True
42 |             if not m:
43 |                 ks[i - 1] = str(a[a.index(int(ks[i - 1]) + 1)])
44 | 
45 |                 deep(i, length, kps)
46 |             else:
47 |                 ks[i] = str(m)
48 | 
49 | 
50 | deep(0, length, kps)
51 | print ''.join(ks)
52 | 


--------------------------------------------------------------------------------
/basic_grammar.md:
--------------------------------------------------------------------------------
 1 | # 基础语法
 2 | python思想中一切皆对象，但是由于python是一门应用非常广泛的语言，所以在某些方面，你可以把python称为面向对象的语言，但它也可以是一门面向过程的语言：
 3 | **程序 = 数据结构 + 算法**
 4 | * 面向过程：以指令为中心，由指令处理数据，如何组织代码解决问题，如：shell运维
 5 | * 面向对象：以数据为中心，所有的处理代码都围绕数据展开，如何设计数据结构组织数据，
 6 |             并提供对此类数据所允许处理操作，如：web开发
 7 | 
 8 | 
 9 | # 数据结构：
10 | * 1.Python最基本的是数据结构是序列,　序列为索引为非负整数的有序对象的集合
11 | * 2.Python中内建的6种数据序列：列表，元组，字符串，Unicode字符串，buffer对象和xrange对象
12 | * 3.None表示Python的空值，对于Python而言，不管 **"",None,[],{},set(),()** 在if判断语句中都为False
13 | 
14 | ### 基本数据类型
15 | * Integral 类型:
16 |   * 整形：不可变类型
17 |   * 布尔类型：True，False
18 | 
19 | * 浮点类型：
20 |   * 浮点数：3.1415
21 |   * 复数 ：3+6j
22 |   * 十进制数字：
23 | * 字符串："abc”
24 | 
25 | ### Python中组合数据类型：
26 | 注：该类型其实都是对对象的引用
27 | * 元组：tuple()，不可变对象
28 | * 列表：list[]，可变对象
29 | * 字典：dict{}，可变对象
30 | * 集合：set([])，可变对象
31 | 
32 | ### 容器，类型，对象相关概念：
33 | list,tuple,dict都是容器
34 | * 1.list,tuple,dict  可以跨行定义，最后一个元素后可以使用逗号，但元素为空时，不允许使用逗号
35 | * 2.所有对象都有引用计数，当引用计数为0时，垃圾回收器会回收此对象
36 | * 3.获取对象引用计数`import sys ; sys.getrefcount(object)`
37 | * 4.删除对象引用，del(object)，减少对象引用计数
38 | * 5.列表和字典都支持两种赋值操作：浅复制和深复制
39 |   * 浅复制：如list_1=list 创建一个新对象，但是还是对之前对象的引用，新对象会影响之前对象
40 |   * 深复制：创建一个新对象，然后递归的复制一份，放置到新对象中，新对象不会影响之前对象
41 |            深复制可以使用copy模块的deepcopy()实现
42 | * 6.Python中的对象都是"第一类"，即使用标识符命名的所有对象都有相同的状态，于是能够命名所有对象，
43 |   都可以直接当数据进行处理，比如：`a = 1,b = 2,result = b/a`表示数值对象直接可以当作数据处理
44 | * 7.所有序列（概念：序列为索引为非负整数的有序对象的集合）都支持迭代（字符串也是一个序列）
45 | * 8.所有序列都支持的操作和方法：
46 |    * 索引 : `val[i]`
47 |    * 切片 : `val[i:j]`
48 |    * 扩展切片 : `val[i:j:stride]`
49 |    * 长度 : `len(val)`
50 |    * 最小值 : `min(val)`
51 |    * 最大值 : `max(val)`
52 |    * 数值序列求和 : `sum(val)`
53 |    * 都为True : `all(val)`
54 |    * 任意为True : `any(val)`
55 |    * 连接 : `val+val2`
56 |    * 重复 : `val * N`
57 |    * 存在 : `item in container`
58 |    * 不存在 : `item not in container`
59 | 
60 | ### 对象引用（变量）：
61 | * Python中所有的数据存为内存对象:
62 |   * Python中，"变量"实际是"指向内存对象的引用"
63 | 
64 | * 动态类型：
65 |   * 在任何时刻，只要需要，某个对象引用都可以重新引用一个不同的对象，可以是不同的数据类型
66 |   * 内置函数type()用于返回给定对象的数据类型
67 |   * 内置函数id()查看变量引用对象在内存中的地址
68 |   * "=”用于将变量名与内存中的对象绑定，如果对象存在，就直接指向该对象，如果不存在，用"=”创建引用的对象
69 | 
70 | **例如**：`name="tom"`,这个操作Python会在内存中声明一个name变量，开辟一块内存空间存储`tom`对象,然后`name`指向
71 | `tom`这个字符串对象，当`tom`这个字符串的引用计数为0时，Python会在合适的机会进行垃圾回收
72 | 
73 | # 命名规范：
74 | * 只能包含字母，数字，下划线，不能以数字开头
75 | * 区分字母大小写
76 | * 禁止使用关键字
77 | 
78 | * 以单一下划线开头的命名的变量（\_name）不会被from module import * 语句导入
79 | * 前后有两个下划线命名的变量（\_\_str__）是系统定义的变量名，对Python解释器有特殊含义
80 | * 以两个下划线开头但结尾没有的变量（\_\_numbers）是类的本地变量
81 | * 在交互模式下，"\_" 用于保存最后表达式的结果
82 | 
83 | **注意：变量名没有类型，对象才有**
84 | 
85 | # 帮助文档：
86 | * 查看对象方法：`dir(obj)`
87 | * 查看对象函数使用：`help(obj.method)`
88 | 
89 | 
90 | 
91 | 


--------------------------------------------------------------------------------
/class1_preliminary.md:
--------------------------------------------------------------------------------
  1 | # python 的类初步 
  2 | 
  3 | # 基础语法
  4 | ### 定义
  5 | 关键字：class
  6 | 格式： class ClassName(): pass
  7 | 
  8 | ### 初始化
  9 | 使用`__init__(self)`来进行初始化操作,`self`是python类中方法中必须存在的参数，表示当前对象，
 10 | 使用该参数传递对象，类似java的this, 当然`self`只是一个别名，你可用随便指定任意名称，
 11 | 如`abc`,但是不建议这样做，可读性差
 12 | 
 13 | ```python
 14 | # -*- coding=utf-8 -*-
 15 | # 定义
 16 | class Person():
 17 |     def __init__(self):
 18 |         print '初始化 ...'
 19 | 
 20 | # 引用
 21 | p=Person()
 22 | 
 23 | print p
 24 | 
 25 | """结果：
 26 | >>初始化 ...
 27 | >><__main__.Person instance at 0x7f0ac74f1c68>
 28 | """
 29 | ```
 30 | ### 初始化并且传递参数
 31 | 使用`__init__(self,param)`来进行初始化操作
 32 | 
 33 | ```python
 34 | # -*- coding=utf-8 -*-
 35 | # 定义
 36 | class Person():
 37 |     def __init__(self, params):
 38 |         self.params = params
 39 |         print '初始化 ...'
 40 | 
 41 | 
 42 | # 引用
 43 | p = Person("hello python")
 44 | 
 45 | print p.params
 46 | 
 47 | """结果：
 48 | >>初始化 ...
 49 | >>hello python
 50 | """
 51 | ```
 52 | 
 53 | ### 传递任意参数
 54 | ```python
 55 | # -*- coding=utf-8 -*-
 56 | # 定义
 57 | class Person():
 58 |     def __init__(self, *args,**kwargs):
 59 |         print '传入参数：',kwargs,args
 60 |         for k, v in kwargs.iteritems():  # 使用setattr()方法添加属性
 61 |             setattr(self, k, v)
 62 | 
 63 |         print '初始化 ...'
 64 | 
 65 | 
 66 | # 引用
 67 | 
 68 | p = Person(**{'k':1,'kk':2})
 69 | 
 70 | print p.__dict__  # 打印对象属性字典
 71 | 
 72 | ```
 73 | 
 74 | 
 75 | 
 76 | # 对象操作之动态属性
 77 | python 实例对象可用动态的添加或删除属性
 78 | ### 添加
 79 | * `setattr(object,'field_name',value)` : 添加或修改object对象的属性field_name,值为value
 80 | * `object.field_name = value` : 直接使用`对象.属性=值` 为对象添加或修改属性
 81 | ### 获取
 82 | * `getattr(object,'field_name',default_value)`：获取object对象的field_name属性，假如该属性不存在，返回default_value默认值
 83 | * `object.filed_name` : 直接使用`对象.属性` 获取对象的属性值，如果属性不存在，抛出异常
 84 | ### 删除
 85 | * `delattr(object,'field_name')` : 删除object对象的field_name属性，如果属性不存在，抛出异常
 86 | * `del object.filed_name` : 使用`del 对象.属性` 删除object对象的field_name属性，如果属性不存在，抛出异常
 87 | ### 判读是否存在
 88 | * `hasattr(object, field_name)`: 判断object对象是否存在field_name属性
 89 | 
 90 | 
 91 | # 属性和方法
 92 | 在类里＂私有属性＂使用双下划线（\_\_）开头，＂私有方法＂也是使用双下划线（\_\_）开头，＂受保护属性和受保护方法＂使用单下划线（\_）开头，
 93 | ＂公开属性和方法使用字母开头＂：
 94 | ```python
 95 | class A(object):
 96 |     def __init__(self):
 97 |         self.__name = "tom"  # private 私有属性,不能被外部调用, (实际是可以用 ._{className}__{filedName}来访问)
 98 |         self._id = 10  # protected 受保护的属性,可以被外部调用，但不建议
 99 |         self.age = 20  # public 公开的属性,完全被外部访问
100 | 
101 |     def __say_hi(self):
102 |         """
103 |         私有方法,不能被外部调用,(实际是可以用 ._{className}__{methodName}来访问)
104 |         :return:
105 |         """
106 |         print 'private method : say hi %s' % self.__name
107 | 
108 |     def _hello(self):
109 |         """
110 |         受保护的方法,可以被外部调用,但不建议
111 |         :return:
112 |         """
113 |         print 'protected method: hello %s' % self._id
114 | 
115 |     def hello_world(self):
116 |         """
117 |         公开的方法
118 |         :return:
119 |         """
120 |         print "public method : hello world %s" % self.age
121 | 
122 | ```
123 | 


--------------------------------------------------------------------------------
/class2_annotation.md:
--------------------------------------------------------------------------------
  1 | # Python 类注解
  2 | 
  3 | ```python
  4 | 
  5 | class Person(object):
  6 |     # 类属性，所有实例只存在一份类属性，共享该属性．
  7 |     class_attr = None
  8 | 
  9 |     def __init__(self, id, name):
 10 |         """
 11 |         实例方法，第一个参数必须为 self
 12 |         :param id: 实例属性
 13 |         :param name: 实例属性
 14 |         """
 15 |         self.id = id
 16 |         self.name = name
 17 |         self.__private_filed = 'private filed'  # 使用"__"开头的属性,为私有属性,外部无法访问
 18 | 
 19 |     @classmethod
 20 |     def class_method(cls):
 21 |         """
 22 |         类方法, 使用 @classmethod 注解来标注,参数为 cls
 23 |         可以通过cls.属性或方法名调用
 24 |         """
 25 |         return ' this is class method'
 26 | 
 27 |     @staticmethod
 28 |     def static_method():
 29 |         """
 30 |         static方法, 使用@staticmethod 注解来标注
 31 |         只能通过Person.属性名或方法名，一个全局函数
 32 |         """
 33 |         return ' this is static method'
 34 | 
 35 |     def get_private_filed(self):
 36 |         """
 37 |         可以定义方法来提供外部访问该属性，而不知道内部结构
 38 |         :return:__private_filed
 39 |         """
 40 |         return self.__private_filed
 41 | 
 42 | ```
 43 | 
 44 | # Python类深入
 45 | ### 访问限制
 46 |         python中访问限制,如果一个属性由双下划线开头(__)，该属性就无法被外部访问.
 47 |         但是，如果一个属性以"__xxx__"的形式定义，那它又可以被外部访问了，
 48 |         以"__xxx__"定义的属性在Python的类中被称为特殊属性，有很多预定义的特殊属性可以使用，
 49 |         通常我们不要把普通属性用"__xxx__"定义。
 50 |         以单下划线开头的属性"_xxx"虽然也可以被外部访问，但是，按照习惯，他们不应该被外部访问。
 51 | ### 属性
 52 |         绑定在一个实例上的属性不会影响其他实例，但是，类本身也是一个对象，如果在类上绑定一个属性，
 53 |         则所有实例都可以访问类的属性，并且，所有实例访问的类属性都是同一个！
 54 |         也就是说，实例属性每个实例各自拥有，互相独立，而类属性有且只有一份。
 55 |         当实例属性和类属性重名时，实例属性优先级高，它将屏蔽掉对类属性的访问。但不会影响其他实例对象
 56 |         当类属性变为(__)私有时，外部依然无法访问
 57 | ### 函数方法
 58 |         我们在 class 中定义的实例方法其实也是属性，它实际上是一个函数对象：
 59 |         举例：p1.get_grade 返回的是一个函数对象但这个函数是一个绑定到实例的函数，
 60 |         　　　p1.get_grade() 才是方法调用
 61 |         因为方法也是一个属性，所以，它也可以动态地添加到实例上，只是需要用 types.MethodType()
 62 |         把一个函数变为一个方法：
 63 |         代码：
 64 |                 import types
 65 |                 def fn_get_grade(self):
 66 |                     if self.score >= 80:
 67 |                         return 'A'
 68 |                     if self.score >= 60:
 69 |                         return 'B'
 70 |                     return 'C'
 71 |                 class Person(object):
 72 |                     def __init__(self, name, score):
 73 |                         self.name = name
 74 |                         self.score = score
 75 |                 p1 = Person('Bob', 90)
 76 |                 p1.get_grade = types.MethodType(fn_get_grade, p1, Person)
 77 |                 print p1.get_grade()
 78 |         end:
 79 | ### 类方法
 80 |         通过标记一个 @classmethod，该方法将绑定到类上，而非类的实例。
 81 |         类方法的第一个参数将传入类本身，通常将参数名命名为 cls
 82 |         因为是在类上调用，而非实例上调用，因此类方法无法获得任何实例变量，只能获得类的引用。
 83 | ### 继承
 84 |         class Person(object):
 85 |             def __init__(self, name, gender):
 86 |                 self.name = name
 87 |                 self.gender = gender
 88 |         class Student(Person):
 89 |              #定义Student类时，只需要把额外的属性加上，例如score：
 90 |              def __init__(self, name, gender, score):
 91 |                 super(Student, self).__init__(name, gender)
 92 |                 self.score = score
 93 |         一定要用 super(Student, self).__init__(name, gender) 去初始化父类，否则，
 94 |         继承自 Person 的 Student 将没有 name 和 gender。
 95 |         函数super(Student, self)将返回当前类继承的父类，即 Person ，
 96 |         然后调用__init__()方法，注意self参数已在super()中传入，在__init__()中将隐式传递，
 97 |         不需要写出（也不能写）。
 98 | ### python中判断类型
 99 |         函数isinstance()可以判断一个变量的类型，既可以用在Python内置的数据类型
100 |         如str、list、dict，也可以用在我们自定义的类，它们本质上都是数据类型。
101 |         >>>p = Person('zhangsan','male')
102 |         >>>isinstance(p, Person)
103 |         True
104 | ### python中多态
105 |         类具有继承关系，并且子类类型可以向上转型看做父类类型
106 |         子类重写父类的方法，调用时首先调用子类的方法实现
107 | ### python中多重继承
108 |         class A(B,C):
109 |             pass
110 |         多重继承的目的是从两种继承树中分别选择并继承出子类，以便组合功能使用。
111 | ### python中获取对象信息
112 |         type() 函数获取变量的类型
113 |         dir() 函数获取变量的所有属性
114 |         setattr(s, 'name', 'Adam')  # 设置新的name属性
115 |         getattr(s, 'age', 20)  # 获取age属性，如果属性不存在，就返回默认值20
116 | # python的特殊方法
117 | ### \_\_str__()和 \_\_repr__()
118 |         如果要把一个类的实例变成 str，就需要实现特殊方法__str__()：
119 |         代码：
120 |             def __str__(self):
121 |                 　return '(Person: %s)' % self.name
122 |             __repr__ = __str__  #偷懒的定义__repr__函数
123 |     　　 end:
124 |         因为 Python 定义了__str__()和__repr__()两种方法，
125 |         __str__()用于显示给用户，而__repr__()用于显示给开发人员。
126 | ### \_\_cmp__()
127 |         对 int、str 等内置数据类型排序时，Python的 sorted() 按照默认的比较函数 cmp 排序，
128 |         但是，如果对一组 Student 类的实例排序时，就必须提供我们自己的特殊方法 __cmp__()
129 |         代码：
130 |             def __cmp__(self, s):
131 |                 if self.name < s.name:
132 |                     return -1
133 |                 elif self.name > s.name:
134 |                     return 1
135 |                 else:
136 |                     return 0
137 |         end:
138 |         使用：print sorted(person_list)
139 |         以分数排序：
140 |         　　def __cmp__(self, s):
141 |                 if self.score == s.score:
142 |                     return cmp(self.name, s.name)
143 |                 return -cmp(self.score, s.score)
144 | ### \_\_len__()
145 |         如果一个类表现得像一个list，要获取有多少个元素，就得用 len() 函数。
146 |         要让 len() 函数工作正常，类必须提供一个特殊方法__len__()，它返回元素的个数。
147 | ### \_\_slots__
148 |         如果要限制添加的属性，例如，Student类只允许添加 name、gender和score 这3个属性，
149 |         就可以利用Python的一个特殊的__slots__来实现。
150 |         __slots__的目的是限制当前类所能拥有的属性，如果不需要添加任意动态的属性，
151 |         使用__slots__也能节省内存
152 |         代码：
153 |             class Student(object):
154 |                 __slots__ = ('name', 'gender', 'score')
155 |                 pass
156 |         end:
157 | ### \_\_call__
158 |         所有的函数都是可调用对象。
159 |         一个类实例也可以变成一个可调用对象，只需要实现一个特殊方法__call__()。
160 |         class A(object):
161 |             def __call__(self,s):
162 |                 return 'hello %s'%s
163 |         >>>a=A
164 |         >>>print a('jack')
165 |         hello jack
166 | ### python中 @property
167 |         @property表示可以将方法当作属性来使用
168 |         第一个score(self)是get方法，用@property装饰，
169 |         第二个score(self, score)是set方法，用@score.setter装饰
170 |         　　　　
171 |         代码：
172 |             @property
173 |             def score(self):
174 |                 return self.__score
175 |             @score.setter
176 |             def score(self, score):
177 |                 if score < 0 or score > 100:
178 |                     raise ValueError('invalid score')
179 |                 self.__score = score
180 |         end:
181 |         使用：
182 |             obj.score = 90 #调用set方法
183 |             print obj.score　#调用get方法
184 | 
185 | # 练习
186 | ```python
187 | def fib(num):
188 |     """
189 |     斐波那契数列
190 |     """
191 |     a, b, L = 0, 1, []
192 |     for n in range(num):
193 |         L.append(a)
194 |         a, b = b, a + b
195 |     return L
196 | 
197 | 
198 | def gcd(a, b):
199 |     if b == 0:
200 |         return a
201 |     return gcd(b, a % b)
202 | 
203 | 
204 | class Rational(object):
205 |     """
206 |     分数计算
207 |     """
208 | 
209 |     def __init__(self, p, q):
210 |         self.p = p
211 |         self.q = q
212 | 
213 |     def __add__(self, r):
214 |         return Rational(self.p * r.q + self.q * r.p, self.q * r.q)
215 | 
216 |     def __sub__(self, r):
217 |         return Rational(self.p * r.q - self.q * r.p, self.q * r.q)
218 | 
219 |     def __mul__(self, r):
220 |         return Rational(self.p * r.p, self.q * r.q)
221 | 
222 |     def __div__(self, r):
223 |         return Rational(self.p * r.q, self.q * r.p)
224 | 
225 |     def __str__(self):
226 |         g = gcd(self.p, self.q)
227 |         return '%s/%s' % (self.p / g, self.q / g)
228 | 
229 |     __repr__ = __str__
230 | 
231 | 
232 | r1 = Rational(1, 2)
233 | r2 = Rational(1, 4)
234 | print r1 + r2
235 | print r1 - r2
236 | print r1 * r2
237 | print r1 / r2
238 | ```
239 | 


--------------------------------------------------------------------------------
/class3_inherit.md:
--------------------------------------------------------------------------------
 1 | # 类继承
 2 | 语法：
 3 | ```python
 4 | class ClassName(父类)：
 5 |     def __init__(self [,父类属性] [,子类属性]):
 6 |         # 重写init方法
 7 |         super(ClassName, self).__init__([父类属性] [,子类属性])
 8 |         self.子类属性 = 子类属性
 9 | ```
10 | 
11 | 举例：如下代码，函数`super(Student, self)`将返回当前类继承的父类，即 `Person` ，
12 | 然后调用`__init__()`方法，注意`self`参数已在`super()`中传入，在`__init__()`中将隐式传递，不需要写出（也不能写）
13 | ```python
14 | class Person(object):
15 |     def __init__(self, name, sex):
16 |         self.name = name
17 |         self.sex = sex
18 | 
19 |     def say_hello(self):
20 |         return 'hello python'
21 | 
22 |     def show_me(self):
23 |         return 'my name is %s , sex is %s' % (self.name, self.sex)
24 | 
25 | 
26 | class Student(Person):
27 |     # 定义Student类时，只需要把额外的属性加上，例如score：
28 |     def __init__(self, name, sex, score):
29 |         super(Student, self).__init__(name, sex)
30 |         self.score = score
31 | 
32 |     def student(self):
33 |         """子类方法"""
34 |         return 'i am a student, my name is %s' % self.name
35 | 
36 |     def show_me(self):
37 |         """重写父类方法"""
38 |         return 'my name is %s , sex is %s , my final score is %d' % (self.name, self.sex, self.score)
39 | 
40 | 
41 | stu = Student("aric", 'man', 20)
42 | print stu.__dict__  # 查看子类属性字典 >>{'score': 20, 'name': 'aric', 'sex': 'man'}
43 | print stu.say_hello()  # 查看子类继承父类的方法 >>hello python
44 | print stu.student()  # 查看子类独有的方法 >>i am a student, my name is aric
45 | print stu.show_me()  # 查看子类重写的父类方法 >>my name is aric , sex is man , my final score is 20
46 | 
47 | ```
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/class4_thorough.md:
--------------------------------------------------------------------------------
 1 | # python 类深入
 2 | 
 3 | # 特殊方法
 4 | ### \_\_new__() 方法：
 5 | 在python内部，真正的初始化函数时\_\_new__()方法,它在\_\_init__()方法之前被调用，它是一个类方法，在创建对象时调用。
 6 | 而\_\_init__()方法是在创建完对象后调用，对当前对象的实例做一些一些初始化，无返回值。
 7 | 如果重写了\_\_new__()而在\_\_new__()里面没有调用\_\_init__()或者没有返回实例,那么\_\_init__()将不起作用。
 8 | 
 9 | ###### 使用
10 | * 使用\_\_new__()方法设计单例模式
11 | ```python
12 | import threading
13 | lock = threading.Lock()
14 | 
15 | 
16 | class Singleton(object):
17 |     __instance = None
18 | 
19 |     def __init__(self):
20 |         pass
21 | 
22 |     def __new__(cls, *args):
23 |         if not Singleton.__instance:
24 |             # set lock keep thread safe
25 |             try:
26 |                 lock.acquire()
27 |                 if not Singleton.__instance:
28 |                     Singleton.__instance = object.__new__(cls, *args)
29 |             except Exception, e:
30 |                 print 'Singleton: init error : %s' % e
31 |             finally:
32 |                 lock.release()
33 |         return Singleton.__instance
34 | 
35 | 
36 | ### TEST
37 | s1 = Singleton()
38 | s2 = Singleton()
39 | s1.dicts = {'name': 'tom'}
40 | 
41 | print id(s2) == id(s1), s2.dicts
42 | ```
43 | ### \_\_setattr__() 方法：
44 | python可用动态给对象添加属性，禁止添加属性需要重写该方法
45 | ```python
46 |  def __setattr__(self, key, value):
47 |         pass
48 | ```
49 | 
50 | ### \_\_dict__() 方法：
51 | 使用\_\_dict__()方法用于返回对象的属性字典，python重写\_\_setattr__()方法禁止对象添加属性，
52 | 但是可以通过 ` obj.__dict__['index']= 11 `添加属性，重写\_\_dict__()方法可以禁用此方法
53 | ```python
54 |   def __dict__(self):
55 |         pass
56 | ```
57 | ###### 构造全局字典
58 | ```python
59 | import threading
60 | 
61 | lock = threading.Lock()
62 | 
63 | 
64 | class ApplicationDICT(object):
65 |     __instance = None
66 |     __maps = {}
67 | 
68 |     def __new__(cls, *args):
69 |         if not ApplicationDICT.__instance:
70 |             # set lock keep thread safe
71 |             try:
72 |                 lock.acquire()
73 |                 if not ApplicationDICT.__instance:
74 |                     ApplicationDICT.__instance = object.__new__(cls, *args)
75 |             except Exception, e:
76 |                 print 'Singleton: init error : %s' % e
77 |             finally:
78 |                 lock.release()
79 |         return ApplicationDICT.__instance
80 | 
81 |     @property
82 |     def maps(self):
83 |         return self.__maps
84 | 
85 |     def set_maps(self, k, v):
86 |         assert k and v
87 |         self.__maps[k] = v
88 | 
89 |     def __setattr__(self, key, value):
90 |         pass
91 | 
92 |     def __dict__(self):
93 |         pass
94 | 
95 | ```
96 | 


--------------------------------------------------------------------------------
/contributed_modules/mongodb/mongodb_utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Time           : 17-6-13 下午12:56
  4 | # @Author         : Tom.Lee
  5 | # @Docs           : http://www.cnblogs.com/hhh5460/p/5838516.html
  6 | # @File           : mongodb.py
  7 | # @Product        : PyCharm
  8 | import pymongo
  9 | 
 10 | 
 11 | class _Mongodb(object):
 12 |     def __init__(self,
 13 |                  host=None,
 14 |                  port=None,
 15 |                  document_class=dict,
 16 |                  tz_aware=None,
 17 |                  connect=None,
 18 |                  **kwargs):
 19 |         self.__mongodb = pymongo.MongoClient(
 20 |             host, port, document_class, tz_aware, connect, **kwargs)
 21 | 
 22 |     @property
 23 |     def mongodb_client(self):
 24 |         return self.__mongodb
 25 | 
 26 | 
 27 | class MongodbUtils(object):
 28 |     def __init__(self,
 29 |                  host=None,
 30 |                  port=None,
 31 |                  document_class=dict,
 32 |                  tz_aware=None,
 33 |                  connect=None,
 34 |                  **kwargs):
 35 |         self.__mongodb_client = _Mongodb(
 36 |             host=host,
 37 |             port=port,
 38 |             document_class=document_class,
 39 |             tz_aware=tz_aware,
 40 |             connect=connect,
 41 |             **kwargs).mongodb_client
 42 |         self.__database = None
 43 |         self.__collection = None
 44 | 
 45 |     @property
 46 |     def mongodb_client(self):
 47 |         return self.__mongodb_client
 48 | 
 49 |     @property
 50 |     def mongodb_database(self):
 51 |         assert self.__database
 52 |         return self.__database
 53 | 
 54 |     @property
 55 |     def mongodb_collection(self):
 56 |         assert self.__collection
 57 |         return self.__collection
 58 | 
 59 |     def use_db(self, db):
 60 |         """
 61 |         切换数据库 > use tom_db
 62 | 
 63 |         :param db:
 64 |         :return:
 65 |         """
 66 |         self.__database = self.db_create_or_get(db)
 67 |         return self
 68 | 
 69 |     def use_collection(self, collection, db=None):
 70 |         """
 71 |         使用表操作 > db.tom_table
 72 | 
 73 |         :param collection:
 74 |         :param db:
 75 |         :return:
 76 |         """
 77 |         if db:
 78 |             self.__database = self.db_create_or_get(db)
 79 |         self.__collection = self.mongodb_database[collection]
 80 |         return self
 81 | 
 82 |     def db_list(self):
 83 |         """
 84 |         数据库列表 show dbs
 85 | 
 86 |         :return: ['db1','db2']
 87 |         """
 88 |         return self.mongodb_client.database_names()
 89 | 
 90 |     def db_exists(self, db_name):
 91 |         """
 92 |         :param db_name:
 93 |         :return: True/False
 94 |         """
 95 |         return db_name in self.db_list()
 96 | 
 97 |     def db_create_or_get(self, db_name):
 98 |         """
 99 |         创建或使用
100 |         > use tom_db
101 |         > db.createCollection('table1') # 第二步开始创建数据库
102 | 
103 |         :param db_name:
104 |         :return: __mongodb.get_database(db_name)
105 |         """
106 |         # self.mongodb_client.get_database(db_name)
107 | 
108 |         return self.mongodb_client[db_name]
109 | 
110 |     def db_delete(self, db_name):
111 |         """
112 |         删除
113 |         > use tom_db
114 |         > db.dropDatabase()
115 | 
116 |         :param db_name:
117 |         :return:
118 |             error  :  {u'code': 26, u'ok': 0.0, u'errmsg': u'ns not found'}
119 |             success:  {u'ns': u'tom_db.tom_table', u'ok': 1.0, u'nIndexesWas': 1}
120 |         """
121 |         return self.mongodb_client.drop_database(db_name)
122 | 
123 |     def collection_list(self):
124 |         """
125 |         表(文档)列表 > show tables
126 |         :return:
127 |         """
128 |         return self.mongodb_database.collection_names()
129 | 
130 |     def collection_create_or_get(self, collection_name):
131 |         """
132 |         创建或获取表 createCollection('table1')
133 | 
134 |         :param collection_name:
135 |         :return:
136 |         """
137 |         return self.mongodb_database[collection_name]
138 | 
139 |     def collection_exists(self, collection_name):
140 |         """
141 |         集合是否存在
142 |         :param collection_name:
143 |         :return:
144 |         """
145 |         return collection_name in self.collection_list()
146 | 
147 |     def collection_delete(self, collection_name):
148 |         """
149 |         删除集合 db.tom_table2.drop()
150 | 
151 |         :param collection_name:
152 |         :return:
153 |         """
154 |         return self.mongodb_database.drop_collection(collection_name)
155 | 
156 |     def document_count(self, filter_=None):
157 |         """
158 |         db.tom_table.count()
159 | 
160 |         :param filter_:{'name':'zs'}
161 |         :return:
162 |         """
163 |         return self.mongodb_collection.count(filter=filter_)
164 | 
165 |     def document_find(self, *args, **kwargs):
166 |         """
167 |         db.tom_table.find({'seq':'_seq_7'})
168 | 
169 |         :param args:
170 |         :param kwargs:{'seq':'_seq_7'}
171 |         :return:
172 |         """
173 |         return self.mongodb_collection.find(*args, **kwargs)
174 | 
175 |     def document_insert(self, dict_item):
176 |         """
177 |         db.tom_table.insert({'name':'jack'})
178 | 
179 |         :param dict_item: {'name':'jack'}
180 |         :return:
181 |         """
182 |         return self.mongodb_collection.insert(dict_item)
183 | 
184 |     def document_drop(self):
185 |         """
186 |         删除全部文档
187 |         :return:
188 |         """
189 |         return self.mongodb_collection.drop()
190 | 
191 |     def document_delete(self, filter_, collation=None):
192 |         """
193 |         db.tom_table.deleteOne({'seq':'_seq_7'})
194 | 
195 |         :param filter_: {'name':'jack'}
196 |         :param collation:
197 |         :return:
198 |         """
199 |         result = self.mongodb_collection.delete_one(filter_, collation)
200 |         return result.delete_count > 0
201 | 
202 |     def document_delete_list(self, filter_, collation=None):
203 |         """
204 |         db.tom_table.deleteMany({'seq':'_seq_7'})
205 | 
206 |         :param filter_: {'seq':'_seq_7'}
207 |         :param collation:
208 |         :return:
209 |         """
210 |         self.mongodb_collection.delete_many(filter_, collation)
211 |         return self.document_count(filter_) == 0
212 | 


--------------------------------------------------------------------------------
/contributed_modules/mysql/mysqldb_/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-8-30 下午3:49
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : __init__.py.py
 7 | # @Product        : PyCharm
 8 | # @Docs           : 
 9 | # @Source         : 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/contributed_modules/mysql/mysqldb_/mysql_lock.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Time    : 2017/8/25 22:42
  4 | # @Author  : Tom.lee
  5 | # @Site    : 
  6 | # @File    : mysql_lock.py
  7 | # @Software: PyCharm
  8 | 
  9 | 
 10 | """
 11 | 通过MySQL实现分布式锁服务
 12 | """
 13 | import MySQLdb
 14 | import logging
 15 | import time
 16 | 
 17 | FORMAT_STR = '%(asctime)s -%(module)s:%(filename)s-L%(lineno)d-%(levelname)s: %(message)s'
 18 | logger = logging.getLogger()
 19 | logger.setLevel(logging.DEBUG)
 20 | formatter = logging.Formatter(FORMAT_STR)
 21 | handler = logging.StreamHandler()
 22 | handler.setFormatter(formatter)
 23 | logger.addHandler(handler)
 24 | logging.info("Current log level is : %s", logging.getLevelName(logger.getEffectiveLevel()))
 25 | 
 26 | 
 27 | class MySqlLock(object):
 28 |     LOCK_SQL = "SELECT get_lock('{key}', {timeout}) FROM dual"
 29 |     UNLOCK_SQL = "SELECT release_lock('{key}') FROM dual"
 30 | 
 31 |     def __init__(self, lock_key=None, *args, **kwargs):
 32 |         """
 33 |         :param lock_key:
 34 |         :param args:    参数与MySQLdb初始化参数一致.
 35 |         :param kwargs:  参数与MySQLdb初始化参数一致.
 36 |                     host='localhost'
 37 |                     user='test'
 38 |                     passwd='test'
 39 |                     db='test'
 40 |         """
 41 |         self.__db = MySQLdb.connect(*args, **kwargs)
 42 |         self.lock_key = lock_key or '7ab18906739e4662ac01e69f5ebb7352'
 43 | 
 44 |     def _execute(self, sql):
 45 |         """
 46 |         MySQL数据库操作
 47 |         :param sql:
 48 |         :return: (1L,) --> tuple
 49 |         """
 50 |         res = (-1,)
 51 |         cursor = self.__db.cursor()
 52 |         try:
 53 |             cursor.execute(sql)
 54 |             if cursor.rowcount != 1:
 55 |                 logging.error("Multiple rows returned in mysql lock function.")
 56 |             else:
 57 |                 res = cursor.fetchone()
 58 |         except Exception, ex:
 59 |             logging.error("执行SQL\"%s\" 失败! 异常信息: %s", sql, str(ex))
 60 |         finally:
 61 |             cursor.close()
 62 |         return res
 63 | 
 64 |     def lock(self, timeout):
 65 |         """
 66 |         MySQL数据库加锁
 67 |         :param timeout:  超时时间
 68 |         :return:
 69 |         """
 70 |         # 加锁操作
 71 |         lk = self._execute(self.LOCK_SQL.format(key=self.lock_key, timeout=timeout))
 72 | 
 73 |         if lk[0] == 0:
 74 |             logging.debug("锁'%s'已经被创建.", self.lock_key)
 75 |             return False
 76 |         elif lk[0] == 1:
 77 |             logging.debug("创建锁'%s'." % self.lock_key)
 78 |             return True
 79 |         else:
 80 |             logging.error("获取锁失败!")
 81 |             return None
 82 | 
 83 |     def unlock(self):
 84 |         """
 85 |         释放MySQL锁.
 86 |         :return:
 87 |         """
 88 |         # 释放操作
 89 |         uk = self._execute(self.UNLOCK_SQL.format(key=self.lock_key))
 90 | 
 91 |         if uk[0] == 0:
 92 |             logging.debug("释放锁'%s'失败(该锁被其他进程持有)" % self.lock_key)
 93 |             return False
 94 |         elif uk[0] == 1:
 95 |             logging.debug("释放锁'%s'." % self.lock_key)
 96 |             return True
 97 |         else:
 98 |             logging.error("锁'%s'不存在." % self.lock_key)
 99 |             return None
100 | 
101 | 
102 | if __name__ == "__main__":
103 |     l = MySqlLock(host='localhost', user='root', passwd='root', db='iaasms')
104 |     ret = l.lock(15)
105 |     if not ret:
106 |         logging.error("获取锁失败,退出!")
107 |         quit()
108 | 
109 |     time.sleep(15)  # 模拟跨进程的同步操作!
110 |     # raise Exception('模拟操作异常,mysql会自动释放该进程持有的锁.')
111 |     # TODO something
112 |     print 'hello ok!'
113 | 
114 |     l.unlock()
115 | 


--------------------------------------------------------------------------------
/contributed_modules/mysql/mysqldb_/study_mysqldb.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Time           : 17-3-27 下午4:29
  4 | # @Author         : Tom.Lee
  5 | # @Description    : mysql 操作
  6 | # @File           : study_mysql.py
  7 | # @Product        : PyCharm
  8 | import MySQLdb
  9 | import logging
 10 | from contextlib import closing
 11 | 
 12 | """
 13 | # # １．创建数据库的连接
 14 | # conn = MySQLdb.connect(host='localhost', port=3306, user='root',
 15 | #                        passwd='root', db='iaasms_dev', )
 16 | #
 17 | # # ２．创建游标
 18 | # cur = conn.cursor(MySQLdb.cursors.DictCursor)
 19 | #
 20 | # # ３．通过游标cur 操作execute()方法可以写入纯sql语句对数据进行操作
 21 | # sql = '''
 22 | # SELECT a.name AS snapshot_name, b.name AS volume_name
 23 | # FROM snapshot a INNER JOIN volume b
 24 | # ON a.volume_id=b.volume_id
 25 | # '''
 26 | # count = cur.execute(sql)  # 返回总条数
 27 | # # result = cur.fetchmany(count)  # 返回指定条目的结果集
 28 | # result = cur.fetchall()
 29 | # # ４．关闭游标
 30 | # cur.close()
 31 | #
 32 | # # ５．提交事务，必须要有这个方法，否则数据不会被真正的插入。
 33 | # conn.commit()
 34 | #
 35 | # # ６．关闭连接
 36 | # conn.close()
 37 | #
 38 | # # ************打印***********
 39 | # print result
 40 | 
 41 | # 一次插入多条记录,,返回值为受影响的行数。
 42 | # sql="insert into student values(%s,%s,%s,%s)"
 43 | # cur.executemany(sql,[
 44 | #     ('3','Tom','1 year 1 class','6'),
 45 | #     ('3','Jack','2 year 1 class','7'),
 46 | #     ('3','rick','2 year 2 class','7'),
 47 | #     ])
 48 | 
 49 | # *******************close conn***************************
 50 | from contextlib import closing
 51 | import MySQLdb
 52 | 
 53 | ''' At the beginning you open a DB connection. Particular moment when
 54 |   you open connection depends from your approach:
 55 |   - it can be inside the same function where you work with cursors
 56 |   - in the class constructor
 57 |   - etc
 58 | '''
 59 | db = MySQLdb.connect("host", "user", "pass", "database")
 60 | with closing(db.cursor()) as cur:
 61 |     cur.execute("somestuff")
 62 |     results = cur.fetchall()
 63 |     # do stuff with results
 64 | 
 65 |     cur.execute("insert operation")
 66 |     # call commit if you do INSERT, UPDATE or DELETE operations
 67 |     db.commit()
 68 | 
 69 |     cur.execute("someotherstuff")
 70 |     results2 = cur.fetchone()
 71 |     # do stuff with results2
 72 | 
 73 | # at some point when you decided that you do not need
 74 | # the open connection anymore you close it
 75 | db.close()
 76 | 
 77 | """
 78 | 
 79 | # 创建名为MySQL的日志
 80 | logger = logging.getLogger('MySQL')
 81 | # 设置logger的level为DEBUG
 82 | logger.setLevel(logging.DEBUG)
 83 | # 创建一个输出日志到控制台的StreamHandler
 84 | handler = logging.StreamHandler()
 85 | formatter = logging.Formatter('[%(asctime)s] %(name)s:%(levelname)s: %(message)s')
 86 | handler.setFormatter(formatter)
 87 | # 给logger添加上handler
 88 | logger.addHandler(handler)
 89 | 
 90 | 
 91 | class _Closing(closing):
 92 |     def __exit__(self, *exc_info):
 93 |         if self.thing:
 94 |             self.thing.close()
 95 | 
 96 | 
 97 | class MySQLUtils(object):
 98 |     def __init__(self, *args, **kwargs):
 99 |         """
100 |         :param args:
101 |         :param kwargs:
102 |         """
103 |         for k, v in kwargs.iteritems():
104 |             setattr(self, k, v)
105 | 
106 |         self.__args = args
107 |         self.__kwargs = kwargs
108 |         self.__connection = None
109 |         self.__cursor = None
110 | 
111 |     def __enter__(self):
112 |         """
113 |         打开资源,支持with语法
114 |         :return: MySQLUtils instance
115 |         """
116 |         self.open()
117 |         return self
118 | 
119 |     def __exit__(self, exc_type, exc_val, exc_tb):
120 |         """
121 |         关闭资源,支持with语法
122 |         :param exc_type:
123 |         :param exc_val:
124 |         :param exc_tb:
125 |         :return:
126 |         """
127 |         self.close()
128 |         if exc_tb:
129 |             # 默认返回None,　返回None或False 发生异常交由外部调用程序捕获（建议）
130 |             # 如果返回True,则由该函数内部处理，外部调用会继续执行
131 |             logger.error('[%s]%s' % (exc_type, exc_val))
132 | 
133 |     def open(self):
134 |         """
135 |         打开连接
136 |         :return:
137 |         """
138 |         if self.__connection:
139 |             raise MySQLdb.MySQLError("connection already connected.")
140 |         self.__connection = MySQLdb.connect(*self.__args, **self.__kwargs)
141 |         if self.__cursor:
142 |             raise MySQLdb.MySQLError("cursor already opened.")
143 |         self.__cursor = self.__connection.cursor(MySQLdb.cursors.DictCursor)
144 |         logger.info("connection opened.")
145 | 
146 |     def close(self):
147 |         """
148 |         关闭连接
149 |         :return:
150 |         """
151 |         with _Closing(self.__cursor) as _:
152 |             pass
153 |         with _Closing(self.__connection) as _:
154 |             pass
155 |         self.__cursor = None
156 |         self.__connection = None
157 | 
158 |         logger.info("connection close success.")
159 | 
160 |     def __execute(self, sql, commit=False):
161 |         """
162 |         执行SQL
163 |         :param sql:
164 |         :param commit:
165 |         :return:tuple result or row numbers
166 |         """
167 |         if not (self.__connection and self.__cursor):
168 |             raise MySQLdb.MySQLError("connection already closed.")
169 |         count = self.__cursor.execute(sql)  # 返回总条数
170 |         result = self.__cursor.fetchall()
171 |         self.__connection.commit() if commit else None
172 |         return count if commit else result
173 | 
174 |     def select(self, sql, formatter_func=None):
175 |         """
176 |         查询函数
177 |         :param sql:
178 |         :param formatter_func:格式化函数
179 |         :return:
180 |         """
181 |         if formatter_func:
182 |             return map(formatter_func, self.__execute(sql))
183 |         return self.__execute(sql)
184 | 
185 |     def save_or_update(self, sql):
186 |         """
187 |         编辑或修改
188 |         :param sql:
189 |         :return:row numbers
190 |         """
191 |         return self.__execute(sql, True)
192 | 
193 |     def delete(self, sql):
194 |         """
195 |         删除资源
196 |         :param sql:
197 |         :return: row numbers
198 |         """
199 |         return self.__execute(sql, True)
200 | 
201 | 
202 | if __name__ == '__main__':
203 |     mu = MySQLUtils(host='localhost', port=3306, user='root',
204 |                     passwd='root', db='iaasms_dev')
205 |     import datetime
206 | 
207 | 
208 |     def formatter_datetime(dic):
209 |         for k, v in dic.iteritems():
210 |             if isinstance(v, datetime.datetime):
211 |                 dic[k] = str(v)
212 |         return dic
213 | 
214 | 
215 |     # 1. try-finally
216 |     # try:
217 |     #     mu.open()
218 |     #     # raise Exception('异常')
219 |     #     print mu.select('SELECT * FROM flavor', formatter_datetime)
220 |     #     print mu.delete('DELETE FROM flavor WHERE id=42')
221 |     # finally:
222 |     #     mu.close()
223 | 
224 |     # 2. with
225 |     with mu as mu:
226 |         mu.close()
227 |         # raise Exception('异常')
228 |         print mu.select('SELECT * FROM flavor', formatter_datetime)
229 |         print mu.delete('DELETE FROM flavor WHERE id=42')
230 | 
231 |     print getattr(mu, 'host'), getattr(mu, 'port'), getattr(mu, 'no', None)
232 | 
233 | 


--------------------------------------------------------------------------------
/contributed_modules/mysql/sqlalchemy_/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-8-30 下午4:08
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : __init__.py.py
 7 | # @Product        : PyCharm
 8 | # @Docs           : 
 9 | # @Source         : 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/contributed_modules/mysql/sqlalchemy_/mysql_lock.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Time           : 17-8-26 下午3:54
  4 | # @Author         : Tom.Lee
  5 | # @File           : mysql_lock2.py
  6 | # @Product        : PyCharm
  7 | # @Docs           : 
  8 | # @Source         : 
  9 | 
 10 | """
 11 | 通过MySQL sqlalchemy 实现分布式锁服务
 12 | """
 13 | import logging
 14 | import time
 15 | from sqlalchemy import create_engine
 16 | 
 17 | FORMAT_STR = '%(asctime)s -%(module)s:%(filename)s-L%(lineno)d-%(levelname)s: %(message)s'
 18 | logger = logging.getLogger()
 19 | logger.setLevel(logging.DEBUG)
 20 | formatter = logging.Formatter(FORMAT_STR)
 21 | handler = logging.StreamHandler()
 22 | handler.setFormatter(formatter)
 23 | logger.addHandler(handler)
 24 | logging.info("Current log level is : %s", logging.getLevelName(logger.getEffectiveLevel()))
 25 | 
 26 | 
 27 | class MySqlLock(object):
 28 |     LOCK_SQL = "SELECT get_lock('{key}', {timeout}) FROM dual"
 29 |     UNLOCK_SQL = "SELECT release_lock('{key}') FROM dual"
 30 | 
 31 |     def __init__(self, lock_key=None, **kwargs):
 32 |         """
 33 |         :param lock_key:
 34 |         :param args:    参数与MySQLdb初始化参数一致.
 35 |         :param kwargs:  参数与MySQLdb初始化参数一致.
 36 |                     host='localhost'
 37 |                     user='test'
 38 |                     passwd='test'
 39 |                     db='test'
 40 |         """
 41 |         self.engine = create_engine('mysql+mysqldb://{user}:{pwd}@{host}:{port}/{db_name}?charset=utf8'.format(
 42 |             user=kwargs.pop('user', None),
 43 |             pwd=kwargs.pop('pwd', None),
 44 |             host=kwargs.pop('host', 'localhost'),
 45 |             port=kwargs.pop('pop', '3306'),
 46 |             db_name=kwargs.pop('db_name', None)
 47 |         ))
 48 |         self.lock_key = lock_key or '7ab18906739e4662ac01e69f5ebb7352'
 49 | 
 50 |     def _execute(self, sql):
 51 |         """
 52 |         MySQL数据库操作
 53 |         :param sql:
 54 |         :return: (1L,) --> tuple
 55 |         """
 56 |         res = -1
 57 |         try:
 58 |             e = self.engine.execute(sql)
 59 |             if e.rowcount <= 1:
 60 |                 res = e.rowcount
 61 |         except Exception, ex:
 62 |             logging.error("执行SQL\"%s\" 失败! 异常信息: %s", sql, str(ex))
 63 |         finally:
 64 |             pass
 65 |         return res
 66 | 
 67 |     def lock(self, timeout):
 68 |         """
 69 |         MySQL数据库加锁
 70 |         :param timeout:  超时时间
 71 |         :return:
 72 |         """
 73 |         # 加锁操作
 74 |         lk = self._execute(self.LOCK_SQL.format(key=self.lock_key, timeout=timeout))
 75 | 
 76 |         if lk == 0:
 77 |             logging.debug("锁'%s'已经被创建.", self.lock_key)
 78 |             return False
 79 |         elif lk == 1:
 80 |             logging.debug("创建锁'%s'." % self.lock_key)
 81 |             return True
 82 |         else:
 83 |             logging.error("获取锁失败!")
 84 |             return None
 85 | 
 86 |     def unlock(self):
 87 |         """
 88 |         释放MySQL锁.
 89 |         :return:
 90 |         """
 91 |         # 释放操作
 92 |         uk = self._execute(self.UNLOCK_SQL.format(key=self.lock_key))
 93 | 
 94 |         if uk == 0:
 95 |             logging.debug("释放锁'%s'失败(该锁被其他进程持有)" % self.lock_key)
 96 |             return False
 97 |         elif uk == 1:
 98 |             logging.debug("释放锁'%s'." % self.lock_key)
 99 |             return True
100 |         else:
101 |             logging.error("锁'%s'不存在." % self.lock_key)
102 |             return None
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     l = MySqlLock(host='localhost', user='root', pwd='root', db_name='iaasms')
107 |     ret = l.lock(15)
108 |     if not ret:
109 |         logging.error("获取锁失败,退出!")
110 |         quit()
111 | 
112 |     time.sleep(5)  # 模拟跨进程的同步操作!
113 |     # raise Exception('模拟操作异常,mysql会自动释放该进程持有的锁.')
114 |     # TODO something
115 |     print 'hello ok!'
116 | 
117 |     l.unlock()
118 | 


--------------------------------------------------------------------------------
/contributed_modules/mysql/sqlalchemy_/study_sqlalchemy.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Time           : 17-8-26 下午2:48
  4 | # @Author         : Tom.Lee
  5 | # @File           : study_sqlalchemy.py
  6 | # @Product        : PyCharm
  7 | # @Docs           : 
  8 | # @Source         : sqlalchemy.sql.selectable.py
  9 | import time
 10 | from sqlalchemy import (
 11 |     Table, Column, MetaData, create_engine)
 12 | from sqlalchemy.engine.result import ResultProxy
 13 | from sqlalchemy.sql.sqltypes import (
 14 |     Unicode, INTEGER)
 15 | 
 16 | url = 'mysql+mysqldb://{user}:{pwd}@{host}:{port}/{db_name}?charset=utf8'.format(
 17 |     user='root',
 18 |     pwd='root',
 19 |     host='localhost',
 20 |     port='3306',
 21 |     db_name='iaasms'
 22 | )
 23 | # pool_recycle=3600 连接超时参数
 24 | engine = create_engine(url)
 25 | 
 26 | table = Table(
 27 |     'tom_test', MetaData(),
 28 |     Column('id', INTEGER, primary_key=True),
 29 |     Column('start_time', INTEGER, index=False),
 30 |     Column('last_time', INTEGER, nullable=False),
 31 |     Column('count', INTEGER, nullable=False),
 32 |     Column('region', Unicode(20, _warn_on_bytestring=False))
 33 | )
 34 | 
 35 | # 创建表
 36 | table.create(engine, True)
 37 | 
 38 | 
 39 | def _formatter_data(res):
 40 |     """
 41 |     sqlalchemy.engine.result.ResultProxy 对象数据提取
 42 | 
 43 |     res.cursor._rows   # 数据
 44 |     res._metadata.keys 或 res.cursor.description # 数据库表字段名
 45 |     :param res:
 46 |     :return: list
 47 |     """
 48 |     assert isinstance(res, ResultProxy)
 49 |     assert res.returns_rows
 50 |     rows = []
 51 |     for _row in res.cursor._rows:
 52 |         row = {}
 53 |         for index, column in enumerate(res._metadata.keys):
 54 |             row[column] = _row[index]
 55 |         rows.append(row)
 56 |     return rows
 57 | 
 58 | 
 59 | def _execute_success(res):
 60 |     """
 61 |     sqlalchemy.engine.result.ResultProxy 数据库修改状态
 62 | 
 63 |     res.returns_rows   # 是否返回数据
 64 |     res.rowcount 是否执行成功 1 success,0 error
 65 |     :param res:
 66 |     :return: boolean
 67 |     """
 68 |     assert isinstance(res, ResultProxy)
 69 |     return res.rowcount > 0
 70 | 
 71 | 
 72 | def insert():
 73 |     # 插入
 74 |     # sqlalchemy.exc.IntegrityError 主键冲突异常
 75 |     sql = table.insert().values(**{
 76 |         'id': 2,
 77 |         'start_time': time.time(),
 78 |         'last_time': time.time(),
 79 |         'count': 1,
 80 |         'region': 'test'
 81 |     })
 82 |     res = engine.execute(sql)
 83 |     print _execute_success(res)
 84 | 
 85 | 
 86 | def select():
 87 |     # 查询
 88 |     sql = table.select().where(table.c.id == 2)
 89 |     res = engine.execute(sql)
 90 |     print _formatter_data(res)
 91 | 
 92 | 
 93 | def update():
 94 |     # 修改
 95 |     sql = table.update().where(table.c.id == 1).values(count=9)
 96 |     res = engine.execute(sql)
 97 |     print _execute_success(res)
 98 | 
 99 | 
100 | def delete():
101 |     sql = table.delete().where(table.c.id == 2)
102 |     res = engine.execute(sql)
103 |     print _execute_success(res)
104 | 


--------------------------------------------------------------------------------
/contributed_modules/redis/README.md:
--------------------------------------------------------------------------------
1 | ## 安装
2 | 
3 | * ubuntu : `sudo pip install redis`
4 | 


--------------------------------------------------------------------------------
/contributed_modules/redis/redis_helper.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding:utf-8 -*-
 2 | 
 3 | 
 4 | import redis
 5 | 
 6 | 
 7 | class RedisHelper(object):
 8 |     def __init__(self, port=6379, host='127.0.0.1'):
 9 |         self.port = port
10 |         self.host = host
11 |         self.__conn = redis.Redis(host=self.host, port=self.port)
12 | 
13 |     def set(self, key, value):
14 |         assert key
15 |         self.__conn.set(key, value)
16 |         return True
17 | 
18 |     def get(self, key):
19 |         assert key
20 |         return self.__conn.get(key)
21 | 
22 |     def keys(self, pattern='*'):
23 |         return self.__conn.keys(pattern)
24 | 
25 |     def delete(self, *keys):
26 |         return self.delete(keys)
27 | 
28 |     def subscribe(self, chanel):
29 |         assert chanel
30 |         pub = self.__conn.pubsub()
31 |         pub.subscribe(chanel)
32 |         if pub.parse_response():  # first validate connection
33 |             # return chanel
34 |             return pub
35 |         return None
36 | 
37 |     def publish(self, chanel, message):
38 |         assert chanel and message
39 |         self.__conn.publish(chanel, message)
40 |         return True
41 | 
42 | 
43 | """
44 | TEST
45 | """
46 | if __name__ == '__main__':
47 |     r = RedisHelper()
48 |     r.publish('fm001', 1)
49 |     pub = r.subscribe('fm001')
50 |     print pub
51 |     while True:
52 |         result = pub.parse_response()
53 |         print result[2]
54 | 


--------------------------------------------------------------------------------
/contributed_modules/redis/redis_test.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding:utf-8 -*-
  2 | 
  3 | import redis
  4 | 
  5 | redis_client = redis.Redis()
  6 | print redis_client
  7 | 
  8 | 
  9 | def add_str(k, v):
 10 |     """
 11 |     添加字符串
 12 |     :param k:键
 13 |     :param v:值
 14 |     :return:
 15 |     """
 16 |     redis_client.set(k, v)
 17 | 
 18 | 
 19 | def get_str(k):
 20 |     """
 21 |     获取字符串
 22 |     :param k:键
 23 |     :return:
 24 |     """
 25 |     return redis_client.get(k)
 26 | 
 27 | 
 28 | def add_llist(k, l):
 29 |     """lpush 倒序返回"""
 30 |     redis_client.lpush(k, l)
 31 | 
 32 | 
 33 | def add_rlist(k, l):
 34 |     """rpush 顺序返回"""
 35 |     redis_client.rpush(k, l)
 36 | 
 37 | 
 38 | def get_list(k, start=0, end=-1):
 39 |     """
 40 |     redis_client.lrange('list_descsort',0,-1)
 41 |     :param k:
 42 |     :param start:
 43 |     :param end:
 44 |     :return:
 45 |     """
 46 |     return redis_client.lrange(k, start, end)
 47 | 
 48 | 
 49 | def get_keys(pattern='*'):
 50 |     """
 51 |     :param pattern: 'list*'
 52 |     :return:
 53 |     """
 54 |     return redis_client.keys(pattern)
 55 | 
 56 | 
 57 | def delete_key(*keys):
 58 |     redis_client.delete(keys)
 59 | 
 60 | 
 61 | def redis_subscribe(chanel_name):
 62 |     """
 63 |     redis　订阅频道
 64 |     :param chanel_name:
 65 |     :return:
 66 |     """
 67 |     pub = redis_client.pubsub()
 68 |     pub.subscribe(chanel_name)
 69 |     if pub.parse_response():
 70 |         return pub
 71 |     else:
 72 |         return None
 73 | 
 74 | 
 75 | def redis_publish(chanel_name, **kwargs):
 76 |     """
 77 |     发布消息
 78 |     :param chanel_name:
 79 |     :param kwargs:
 80 |     :return:
 81 |     """
 82 |     redis_client.publish(chanel_name, kwargs)
 83 | 
 84 | 
 85 | # redis publish and subscribe
 86 | # publish message
 87 | '''
 88 | for i in range(10):
 89 |     redis_client.publish('fm101', 'hello i am %d' % i)
 90 | '''
 91 | # subscribe chanel
 92 | '''
 93 | pub = redis_client.pubsub()
 94 | pub.subscribe('fm101')
 95 | while True:
 96 |     print pub.parse_response()
 97 | '''
 98 | 
 99 | if __name__ == '__main__':
100 |     import time
101 | 
102 |     for i in range(10):
103 |         time.sleep(2)
104 |         redis_publish('fm001', k='hello')
105 | 


--------------------------------------------------------------------------------
/contributed_modules/requests/README.md:
--------------------------------------------------------------------------------
 1 | # requests
 2 | 
 3 | ## install
 4 | 
 5 | * ubuntu : `sudo pip install requests`
 6 | 
 7 | 
 8 | ## 使用代理
 9 | proxies={u'http': u'61.186.164.98:8080',}
10 | 参数字典中，key只能为'http',value 为ip:port; 协议为https会自动解析
11 | 


--------------------------------------------------------------------------------
/contributed_modules/requests/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Time    : 17-4-23 下午11:14
4 | # @Author  : tom.lee
5 | # @Site    : 
6 | # @File    : __init__.py.py
7 | # @Software: PyCharm


--------------------------------------------------------------------------------
/contributed_modules/requests/restful.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-9-2 上午11:32
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : restful.py
 7 | # @Product        : PyCharm
 8 | # @Docs           : 
 9 | # @Source         : 
10 | 
11 | import requests
12 | 
13 | 
14 | def json_console_format(s):
15 |     import json
16 |     return json.dumps(s, indent=5)
17 | 
18 | 
19 | class TestCase(object):
20 |     @classmethod
21 |     def _response(cls, res):
22 |         try:
23 |             return res.status_code, json_console_format(res.json())
24 |         except (ValueError, Exception):
25 |             return res.status_code, res.content
26 | 
27 |     def get(self, url, params=None, **kwargs):
28 |         res = requests.get(url=url, params=params, verify=False, **kwargs)
29 |         return self._response(res)
30 | 
31 |     def post(self, url, data=None, body=None, **kwargs):
32 |         res = requests.post(url, data=data, json=body, verify=False, **kwargs)
33 |         return self._response(res)
34 | 
35 |     def put(self, url, data=None, body=None, **kwargs):
36 |         res = requests.put(url, data=data, json=body, verify=False, **kwargs)
37 |         return self._response(res)
38 | 
39 |     def delete(self, url, **kwargs):
40 |         res = requests.delete(url, verify=False, **kwargs)
41 |         return self._response(res)
42 | 
43 |     def head(self, url, headers=None, **kwargs):
44 |         res = requests.head(url, headers=headers or {}, verify=False, **kwargs)
45 |         return self._response(res)
46 | 
47 |     def patch(self, url, data=None, body=None, **kwargs):
48 |         res = requests.patch(url, data=data, json=body, verify=False, **kwargs)
49 |         return self._response(res)
50 | 
51 | 


--------------------------------------------------------------------------------
/contributed_modules/requests/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-7-25 下午3:08
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : test.py
 7 | # @Product        : PyCharm
 8 | 
 9 | from restful import TestCase
10 | 
11 | 
12 | if __name__ == '__main__':
13 |     t = TestCase()
14 |     base_url = 'http://0.0.0.0:9091'
15 | 
16 |     # # get job list
17 |     # print t.get(base_url + '/jobList')[1]
18 |     # # get user list
19 |     print t.get(base_url + '/jobs')[1]
20 |     # add user
21 |     # data = {'name': 'node-16', 'status': 'AVAILABLE'}
22 |     # print t.post(base_url + '/nodes', data=data)[1]
23 | 
24 |     # edit user
25 |     # data = {'job_id': 'node01-tick', 'status': 'pause'}
26 |     # print t.put(base_url + '/jobs', data=data)[1]
27 | 
28 |     # delete user
29 |     # data = {'user_id': 5}
30 |     # print t.delete(base_url + '/mailUsers', data=data)[1]
31 | 


--------------------------------------------------------------------------------
/contributed_modules/requests/utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-8-11 上午11:29
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : utils.py
 7 | # @Product        : PyCharm
 8 | 
 9 | 
10 | import requests
11 | from requests.packages.urllib3.exceptions import InsecureRequestWarning
12 | 
13 | requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
14 | openSSL_error = "'X509' object has no attribute '_x509'"
15 | 
16 | 
17 | def http_inspect(func):
18 |     def wrapper(*args, **kwargs):
19 |         try:
20 |             return func(*args, **kwargs)
21 |         except requests.exceptions.Timeout:
22 |             raise ConnectionError('连接超时')
23 |         except requests.exceptions.RequestException:
24 |             raise ConnectionError('请求失败')
25 |         except Exception, e:
26 |             if e.message == openSSL_error:
27 |                 print """package error, please execute :
28 |                 pip install -U pyOpenSSL"""
29 |             raise ConnectionError('连接失败')
30 | 
31 |     return wrapper
32 | 
33 | 
34 | class ConnectionError(Exception):
35 |     """
36 |     服务连接失败
37 |     """
38 |     pass
39 | 
40 | 
41 | class RequestsUtils(object):
42 |     user_agent = 'Mozilla/5.0 (X11; Linux x86_64) ' \
43 |                  'AppleWebKit/537.36 (KHTML, like Gecko) ' \
44 |                  'Ubuntu Chromium/50.0.2661.102 ' \
45 |                  'Chrome/50.0.2661.102 Safari/537.36'
46 | 
47 |     def __init__(self, headers=None, cookies=None, timeout=3, proxies=None, verify=False):
48 |         self.headers = headers
49 |         self.cookies = cookies
50 |         self.timeout = timeout
51 |         self.proxies = proxies
52 |         self.verify = verify
53 | 
54 |     def _make_headers(self):
55 |         headers = self.headers or {}
56 |         if not headers.get('User-Agent'):
57 |             headers['User-Agent'] = self.user_agent
58 |         if self.cookies:
59 |             headers['Cookie'] = self.cookies
60 |         return headers
61 | 
62 |     @http_inspect
63 |     def get(self, url, params=None):
64 |         return requests.get(url,
65 |                             params=params,
66 |                             headers=self._make_headers(),
67 |                             timeout=self.timeout,
68 |                             proxies=self.proxies,
69 |                             verify=self.verify)
70 | 
71 | 
72 | if __name__ == '__main__':
73 |     req = RequestsUtils()
74 |     print req.get('http://192.168.1.111:8088/web')
75 | 


--------------------------------------------------------------------------------
/crawlers/spider/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 |  pip install beautifulsoup4
3 |  docs: https://www.crummy.com/software/BeautifulSoup/bs4/doc/index.zh.html#
4 | """


--------------------------------------------------------------------------------
/crawlers/spider/downloader.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 17-4-19 下午10:26
 4 | # @Author  : tom.lee
 5 | # @Site    : 
 6 | # @File    : downloader.py
 7 | # @Software: PyCharm
 8 | import logging
 9 | import requests
10 | from requests.packages.urllib3.exceptions import InsecureRequestWarning
11 | 
12 | requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
13 | 
14 | 
15 | # from .tools import Dir
16 | 
17 | 
18 | class HtmlDownloader(object):
19 |     openSSL_error = "'X509' object has no attribute '_x509'"
20 |     user_agent = 'Mozilla/5.0 (X11; Linux x86_64) ' \
21 |                  'AppleWebKit/537.36 (KHTML, like Gecko) ' \
22 |                  'Ubuntu Chromium/50.0.2661.102 ' \
23 |                  'Chrome/50.0.2661.102 Safari/537.36'
24 | 
25 |     def __init__(self, headers=None, cookies=None, timeout=10, proxies=None, verify=False):
26 |         self.headers = headers
27 |         self.cookies = cookies
28 |         self.timeout = timeout
29 |         self.proxies = proxies
30 |         self.verify = verify
31 | 
32 |     def _make_headers(self):
33 |         headers = self.headers or {}
34 |         if not headers.get('User-Agent'):
35 |             headers['User-Agent'] = self.user_agent
36 |         if self.cookies:
37 |             headers['Cookie'] = self.cookies
38 |         return headers
39 | 
40 |     def _request(self, url):
41 |         try:
42 |             resp = requests.get(
43 |                 url, headers=self._make_headers(), timeout=self.timeout,
44 |                 proxies=self.proxies, verify=self.verify)
45 |             return resp.status_code, resp.content
46 |         except requests.exceptions.Timeout:
47 |             logging.error('requests timeout: %s' % url)
48 |             return 502, None
49 |         except requests.exceptions.RequestException:
50 |             logging.error('requests RequestException: %s' % url)
51 |             return 500, None
52 |         except Exception, e:
53 |             if e.message == self.openSSL_error:
54 |                 print """package error, please execute :
55 |                 pip install -U pyOpenSSL"""
56 |             else:
57 |                 logging.error('requests unKnow error: %s' % url)
58 |             return 500, None
59 | 
60 |     def download(self, url, retry=-1):
61 |         """
62 |         :param url:
63 |         :param retry: 失败重试
64 |         :return:
65 |         """
66 |         code, content = self._request(url)
67 |         if retry > 0 and code != 200:
68 |             self.download(url, retry - 1)
69 |         return content if code == 200 else ''
70 | 
71 | 


--------------------------------------------------------------------------------
/crawlers/spider/main.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 17-4-21 上午12:36
 4 | # @Author  : tom.lee
 5 | # @Site    : 
 6 | # @File    : main.py
 7 | # @Software: PyCharm
 8 | from downloader import HtmlDownloader
 9 | from parser import HtmlParser
10 | from urlsmanager import URLSManager
11 | from writer import FileWriter
12 | from tools import Decorator
13 | from proxypools import Proxy, ProxiesPool
14 | 
15 | 
16 | class SpiderWorker(object):
17 |     def __init__(self, url, size=20):
18 |         self.url = url
19 |         self.pool = ProxiesPool()
20 |         self.parser = HtmlParser(url)
21 |         self.url_manager = URLSManager(url_pattern=url, size=size)
22 |         self.writer = FileWriter()
23 | 
24 |     @Decorator.time
25 |     def start(self):
26 |         self.url_manager.add_url(self.url)
27 |         while self.url_manager.has_next():
28 |             hd = HtmlDownloader(proxies=self.pool.get_proxy_ip())
29 |             url = self.url_manager.get_url()
30 |             data = hd.download(url)
31 |             urls = self.parser.simple_tags(data, 'a', attributes=['href'])
32 |             self.url_manager.add_urls([url_.get('href') for url_ in urls])
33 |             title = self.parser.element(data, 'title')
34 |             title = title.getText() if title else 'unknown'
35 |             self.writer.load_data('[%s] %s' % (title, url))
36 |         self.writer.writer()
37 | 
38 | p=Proxy()
39 | p.start()
40 | SpiderWorker('http://www.jikexueyuan.com/').start()
41 | p.join()


--------------------------------------------------------------------------------
/crawlers/spider/parser.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 17-4-19 下午10:10
 4 | # @Author  : tom.lee
 5 | # @Site    : 解析器
 6 | # @File    : parser.py
 7 | # @Software: PyCharm
 8 | 
 9 | 
10 | 
11 | import urlparse
12 | 
13 | import bs4
14 | 
15 | 
16 | class HtmlParser(object):
17 |     """
18 |     网页解析器，可以继承此类，实现更复杂功能
19 |     """
20 |     url_filed = 'href'
21 |     parser = 'html.parser'
22 |     encoding = 'utf-8'
23 | 
24 |     def __init__(self, base_url=None):
25 |         self.__base_url = base_url
26 | 
27 |     def simple_tags(self, data, tag=None, patterns=None, attributes=None):
28 |         """
29 |         单个标签解析
30 |         """
31 |         tags = self.__parser_tags(data, tag, patterns)
32 |         return self.__tags(tags, attributes)
33 | 
34 |     def multilevel_tags(self, data, multilevel_patterns=None, attributes=None):
35 |         """
36 |         多标签解析
37 |         """
38 |         if not multilevel_patterns:
39 |             return data
40 | 
41 |         for tag_patterns in multilevel_patterns:
42 |             tag, patterns = tag_patterns.items()[0]
43 |             data = self.__parser_tags(data, tag, patterns)
44 |             multilevel_patterns.remove(tag_patterns)
45 | 
46 |             if not multilevel_patterns:
47 |                 return self.__tags(data, attributes)
48 | 
49 |             return self.multilevel_tags(data, multilevel_patterns, attributes)
50 | 
51 |     def element(self, data, tag=None, patterns=None):
52 |         """
53 |         查询符合条件的第一个标签元素
54 |         """
55 |         elements = self.elements(data, tag, patterns)
56 |         return elements[0] if elements else None
57 | 
58 |     def elements(self, data, tag=None, patterns=None):
59 |         return self.__parser_tags(data, tag, patterns)
60 | 
61 |     def __tags(self, data, attributes=None):
62 |         tags = [dict(tag_.attrs, text='%s'.encode(self.encoding) % tag_.getText())
63 |                 for tag_ in data]
64 | 
65 |         if not attributes:
66 |             return tags
67 | 
68 |         for tag_attr in tags:
69 |             for k, v in tag_attr.items():
70 |                 if k in attributes:
71 |                     continue
72 |                 tag_attr.pop(k)
73 | 
74 |         if self.__base_url:
75 |             return self.__format_url(tags)
76 | 
77 |         return tags
78 | 
79 |     def __parser_tags(self, data, tag=None, patterns=None):
80 |         return self.__data_parser(data).find_all(tag, patterns)
81 | 
82 |     def __data_parser(self, data):
83 |         return bs4.BeautifulSoup(str(data), self.parser, from_encoding=self.encoding)
84 | 
85 |     def __format_url(self, maps):
86 |         for m in maps:
87 |             if not m.get(self.url_filed):
88 |                 continue
89 |             m[self.url_filed] = urlparse.urljoin(self.__base_url, m.get(self.url_filed))
90 |         return maps
91 | 


--------------------------------------------------------------------------------
/crawlers/spider/proxypools.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Time    : 17-4-22 下午11:04
  4 | # @Author  : tom.lee
  5 | # @Site    : 代理池
  6 | # @File    : proxyspools.py
  7 | # @Software: PyCharm
  8 | import logging
  9 | import re
 10 | import threading
 11 | import time
 12 | import random
 13 | 
 14 | from tools import ReThread
 15 | from downloader import HtmlDownloader
 16 | from parser import HtmlParser
 17 | 
 18 | lock = threading.Lock()
 19 | 
 20 | 
 21 | class ProxiesPool(object):
 22 |     __instance = None
 23 |     __pool = []
 24 | 
 25 |     def __new__(cls, *args):
 26 |         if not ProxiesPool.__instance:
 27 |             try:
 28 |                 lock.acquire()
 29 |                 if not ProxiesPool.__instance:
 30 |                     ProxiesPool.__instance = object.__new__(cls, *args)
 31 |             except Exception, e:
 32 |                 logging.error('ProxiesPool: init error : %s' % e)
 33 |             finally:
 34 |                 lock.release()
 35 |         return ProxiesPool.__instance
 36 | 
 37 |     @property
 38 |     def pool(self):
 39 |         return self.__pool
 40 | 
 41 |     def add(self, ip):
 42 |         if not ip:
 43 |             return
 44 |         self.__pool.append(ip)
 45 | 
 46 |     def get_proxy_ip(self):
 47 |         if self.pool:
 48 |             proxies = self.pool[random.randint(0, len(self.pool) - 1)]
 49 |         else:
 50 |             proxies = None
 51 |         print proxies
 52 |         return proxies
 53 | 
 54 |     def __setattr__(self, key, value):
 55 |         pass
 56 | 
 57 |     def __dict__(self):
 58 |         pass
 59 | 
 60 | 
 61 | class Proxy(ReThread):
 62 |     proxy_site = 'http://www.xicidaili.com/nn'
 63 | 
 64 |     def run(self):
 65 |         while self.is_running:
 66 |             self.__update_proxy_pool()
 67 |             time.sleep(60 * 15)
 68 | 
 69 |     @staticmethod
 70 |     def __re_number(s):
 71 |         if not s:
 72 |             return 0
 73 |         return float('%0.2f' % float(re.sub('[^\d+.\d+$]', '', s)))
 74 | 
 75 |     def __update_proxy_pool(self):
 76 |         downloader = HtmlDownloader()
 77 |         proxy_pool = ProxiesPool()
 78 |         parser = HtmlParser()
 79 |         data = downloader.download(self.proxy_site)
 80 |         speed_times = parser.multilevel_tags(data, [{'tr': None}, {'div': {'class': 'bar'}}])
 81 |         ip_data = parser.elements(data, 'tr')[1:]
 82 |         speed = speed_times[::2]
 83 |         times = speed_times[1::2]
 84 |         for i, ip in enumerate(ip_data):
 85 |             d = {}
 86 |             for j, value in enumerate(filter(lambda x: x, ip_data[i].text.split('\n'))):
 87 |                 if j == 0:
 88 |                     d['ip'] = value
 89 |                 elif j == 1:
 90 |                     d['port'] = value
 91 |                 continue
 92 |             if len(d.keys()) != 2:
 93 |                 continue
 94 |             if self.__re_number(speed[i].get('title')) > 1 \
 95 |                     or self.__re_number(times[i].get('title')) > 1:
 96 |                 continue
 97 | 
 98 |             proxy_pool.add({'http': '%s:%s' % (d.get('ip'), d.get('port'))})
 99 | 
100 | 


--------------------------------------------------------------------------------
/crawlers/spider/tools.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Time    : 17-4-19 下午10:49
  4 | # @Author  : tom.lee
  5 | # @Site    : 
  6 | # @File    : tools.py
  7 | # @Software: PyCharm
  8 | import logging
  9 | import os
 10 | import threading
 11 | import time
 12 | 
 13 | 
 14 | class Decorator(object):
 15 |     @staticmethod
 16 |     def time(func):
 17 |         def wrapper(*args, **kwargs):
 18 |             start = time.time()
 19 |             result = func(*args, **kwargs)
 20 |             logging.warning(
 21 |                 '*******方法[%s]消耗时间%d s' %
 22 |                 (func.__name__, time.time() - start))
 23 |             return result
 24 | 
 25 |         return wrapper
 26 | 
 27 | 
 28 | class Constants(object):
 29 |     parser = 'html.parser'
 30 |     encoding = 'utf-8'
 31 |     url_filed = 'href'
 32 | 
 33 | 
 34 | class Dir(object):
 35 |     @staticmethod
 36 |     def create_dir(path):
 37 |         if not os.path.exists(path):
 38 |             try:
 39 |                 os.makedirs(path)
 40 |             except Exception, e:
 41 |                 print u'文件夹%s　创建失败;\n %s' % (path, e)
 42 |         else:
 43 |             print u'文件夹%s　已经存在' % path
 44 | 
 45 |     @staticmethod
 46 |     def parent_dir(path):
 47 |         path = path.rstrip('/')
 48 |         return '/'.join(path.split('/')[0:-1])
 49 | 
 50 |     @staticmethod
 51 |     def del_dir(path):
 52 |         assert os.path.exists(path) and os.path.isdir(path)
 53 |         for root, dirs, files in os.walk(path, topdown=False):
 54 |             for name in files:
 55 |                 os.remove(os.path.join(root, name))
 56 |             for name in dirs:
 57 |                 os.rmdir(os.path.join(root, name))
 58 |         os.rmdir(path)
 59 | 
 60 |     @staticmethod
 61 |     def create_file(name, mode='r', data=""):
 62 |         try:
 63 |             parent_path = Dir.parent_dir(name)
 64 |             if parent_path and not os.path.exists(parent_path):
 65 |                 Dir.create_dir(parent_path)
 66 |             with open(name, mode)as f:
 67 |                 f.write(data)
 68 |         except Exception, e:
 69 |             print u'%s 创建失败\n异常：%s' % (name, e)
 70 | 
 71 | 
 72 | class ReThread(threading.Thread):
 73 |     def __init__(self, *args, **kwargs):
 74 |         super(ReThread, self).__init__(*args, **kwargs)
 75 |         self.__flag = threading.Event()  # 用于暂停线程的标识
 76 |         self.__flag.set()  # 设置为True
 77 |         self.__running = threading.Event()  # 用于停止线程的标识
 78 |         self.__running.set()  # 将running设置为True
 79 | 
 80 |     @property
 81 |     def is_running(self):
 82 |         """
 83 |         获取运行标志
 84 |         :return: True/False
 85 |         """
 86 |         return self.__running.isSet()
 87 | 
 88 |     def run(self):
 89 |         """
 90 |         使用while 循环,使用self.is_running 来获取运行标志位
 91 |         """
 92 |         pass
 93 | 
 94 |     def stop(self):
 95 |         """
 96 |         设置为False, 让线程阻塞
 97 |         """
 98 |         self.__flag.clear()
 99 | 
100 |     def resume(self):
101 |         """
102 |         设置为True, 让线程停止阻塞
103 |         """
104 |         self.__flag.set()
105 | 
106 |     def exit(self):
107 |         """
108 |         暂停标志设置为True
109 |         运行标志设置为False
110 |         """
111 |         self.__flag.set()
112 |         self.__running.clear()
113 | 


--------------------------------------------------------------------------------
/crawlers/spider/urlsmanager.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 17-4-19 下午10:18
 4 | # @Author  : tom.lee
 5 | # @Site    : 
 6 | # @File    : urlsmanager.py
 7 | # @Software: PyCharm
 8 | import urlparse
 9 | 
10 | 
11 | class URLSManager(object):
12 |     def __init__(self, url_pattern=None, size=None):
13 |         self.url_pattern = url_pattern
14 |         self.size = size
15 |         self.pending_urls = set()
16 |         self.processed_urls = set()
17 | 
18 |     def has_next(self):
19 |         return len(self.pending_urls) > 0
20 | 
21 |     def add_url(self, url):
22 |         if not url:
23 |             return
24 |         url = url.rstrip('/')
25 |         if self.url_pattern and urlparse.urlparse(
26 |                 self.url_pattern).netloc != urlparse.urlparse(url).netloc:
27 |             return
28 |         if url in self.pending_urls | self.processed_urls:
29 |             return
30 |         if self.size and len(self.processed_urls) >= self.size:
31 |             self.pending_urls = set()
32 |             return
33 |         self.pending_urls.add(url)
34 | 
35 |     def add_urls(self, urls):
36 |         if not urls:
37 |             return
38 |         [self.add_url(url) for url in urls]
39 | 
40 |     def get_url(self):
41 |         url = self.pending_urls.pop()
42 |         self.processed_urls.add(url)
43 |         return url
44 | 


--------------------------------------------------------------------------------
/crawlers/spider/writer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 17-4-21 上午12:17
 4 | # @Author  : tom.lee
 5 | # @Site    : 
 6 | # @File    : writer.py
 7 | # @Software: PyCharm
 8 | import sys
 9 | 
10 | reload(sys)
11 | sys.setdefaultencoding('utf8')
12 | 
13 | 
14 | class FileWriter(object):
15 |     def __init__(self, file_name=None):
16 |         self.file_name = file_name or 'data.txt'
17 |         self._data = []
18 | 
19 |     def load_data(self, data):
20 |         if not data:
21 |             return
22 |         self._data.append(data)
23 | 
24 |     def writer(self):
25 |         f = open(self.file_name, 'wb+')
26 |         [f.write('%s\n\n' % d) for d in self._data]
27 |         f.close()
28 | 


--------------------------------------------------------------------------------
/data_analysis/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Time    : 2017/7/23 12:09
4 | # @Author  : Tom.lee
5 | # @Site    : 
6 | # @File    : __init__.py.py
7 | # @Software: PyCharm
8 | 
9 | 


--------------------------------------------------------------------------------
/data_analysis/academic_concept/matrix_product.md:
--------------------------------------------------------------------------------
 1 | # 矩阵乘法
 2 | > 矩阵相乘最重要的方法是一般矩阵乘积。
 3 | 它只有在第一个矩阵的列数（column）和第二个矩阵的行数（row）**相同**时才有意义。
 4 | 一般单指矩阵乘积时，指的便是一般矩阵乘积。一个m×n的矩阵就是m×n个数排成m行n列的一个数阵。
 5 | 由于它把许多数据紧凑的集中到了一起，所以有时候可以简便地表示一些复杂的模型。
 6 | 
 7 | ## 定义
 8 | > 设A为 `m*p` 的矩阵，B为 `p*n` 的矩阵，那么称 `m*n` 的矩阵C为矩阵A与B的乘积，
 9 | 记作 `C=AB` ，其中矩阵C中的第 `i` 行第 `j` 列元素可以表示为：
10 | 
11 | 


--------------------------------------------------------------------------------
/data_analysis/study_matplotlib/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Time    : 2017/7/24 21:46
4 | # @Author  : Tom.lee
5 | # @Site    : 
6 | # @File    : __init__.py.py
7 | # @Software: PyCharm
8 | 
9 | 


--------------------------------------------------------------------------------
/data_analysis/study_matplotlib/graphs/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Time    : 2017/7/30 20:47
4 | # @Author  : Tom.lee
5 | # @Site    : 
6 | # @File    : __init__.py.py
7 | # @Software: PyCharm
8 | 
9 | 


--------------------------------------------------------------------------------
/data_analysis/study_matplotlib/graphs/graphs_histogram.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2017/7/30 20:49
 4 | # @Author  : Tom.lee
 5 | # @Site    : 
 6 | # @File    : graphs_histogram.py
 7 | # @Software: PyCharm
 8 | 
 9 | """
10 | 直方图
11 | """
12 | import matplotlib.pyplot as plt
13 | import numpy as np
14 | 
15 | N = 5
16 | # 男生分数
17 | menMeans = (20, 35, 30, 35, 27)
18 | # 女生分数
19 | womenMeans = (25, 32, 34, 20, 25)
20 | menStd = (2, 3, 4, 1, 2)
21 | womenStd = (3, 5, 2, 3, 3)
22 | ind = np.arange(N)  # the x locations for the groups
23 | width = 0.35  # the width of the bars: can also be len(x) sequence
24 | 
25 | p1 = plt.bar(ind, menMeans, width, color='#d62728', yerr=menStd)
26 | p2 = plt.bar(ind, womenMeans, width, bottom=menMeans, yerr=womenStd)
27 | 
28 | plt.ylabel('Scores')
29 | plt.title('Scores by group and gender')
30 | plt.xticks(ind, ('G1', 'G2', 'G3', 'G4', 'G5'))  # 设置x轴刻度
31 | plt.yticks(np.arange(0, 81, 5))  # 设置y轴刻度
32 | plt.legend((p1[0], p2[0]), ('Men', 'Women'))
33 | plt.savefig("../save_file/graphs_histogram.png")
34 | plt.show()
35 | 


--------------------------------------------------------------------------------
/data_analysis/study_matplotlib/graphs/graphs_quadratic.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2017/7/30 1:38
 4 | # @Author  : Tom.lee
 5 | # @File    : graphs_quadratic.py
 6 | # @Software: PyCharm
 7 | 
 8 | 
 9 | """
10 | numpy 多项式 与 matplotlib 二次函数折线图
11 | """
12 | 
13 | import warnings
14 | 
15 | import matplotlib.pyplot as plt
16 | import numpy as np
17 | 
18 | # 如果最小二乘拟合中的矩阵是秩不足，则引发警告。只有在满 == False时，才会发出警告。
19 | # 警告可以通过以下方式关闭：
20 | warnings.simplefilter('ignore', RuntimeWarning)
21 | warnings.simplefilter('ignore', np.RankWarning)
22 | 
23 | 
24 | def foo(x0):
25 |     # 由图可知，函数分3段,周期函数
26 |     # 设：y= kx + b ,且b=0
27 |     c0, hc, c = 0.4, 1.0, 0.6
28 |     if x0 > 1:
29 |         x0 = float(x0) % 1.0
30 |     if x <= c0:
31 |         k = hc / c0
32 |         return k * x0
33 |     elif x < c:
34 |         k = (hc - 0) / (c0 - c)
35 |         return k * (x0 - c)
36 |     else:
37 |         return 0
38 | 
39 | 
40 | # x 的取值 0到2 范围取50个点
41 | x = np.linspace(0, 2, 50, dtype=np.float64)
42 | 
43 | # 计算对应的y值，并转换为nparray 对象
44 | y = np.array(map(foo, x)).astype(np.float64)
45 | 
46 | print "x值：", x
47 | print "y值：", y
48 | 
49 | m = np.polyfit(x, y, 20)  # 调整拟合多项式的度为20，生成多项式参数
50 | print "多项式参数：", m
51 | 
52 | 
53 | # 一维多项式
54 | p1 = np.poly1d(m)  # 根据多项式参数构造一维多项式
55 | print "一维多项式：", p1
56 | 
57 | # 根据x使用多项式求解y值，
58 | # yp=np.polyval(np.polyfit(x, y, 20),x)，x可以是单个值也可以是列表
59 | y1 = p1(x)
60 | print "一维多项式根据x的计算值：", y1
61 | 
62 | # 以点（“．”）绘制实际值折线
63 | plot1 = plt.plot(x, y, '.', label='original values')
64 | # 以线（“r”）绘制最小二乘拟合折线
65 | plot2 = plt.plot(x, y1, 'r', label='polyfit values')
66 | 
67 | # x轴描述
68 | plt.xlabel('X')
69 | # Y轴描述
70 | plt.ylabel('Y')
71 | # plt.legend(loc=1)  # 指定legend的位
72 | # 标题
73 | plt.title('y = kx (0<x<0.6) | y = 0 (0.6<x<=1)')
74 | 
75 | # 保存文件　eps, pdf, pgf, png, ps, raw, rgba, svg, svgz.
76 | plt.savefig("../save_file/graphs_quadratic.png")
77 | plt.show()
78 | 


--------------------------------------------------------------------------------
/data_analysis/study_matplotlib/graphs/graphs_trigonometric.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-7-31 下午2:43
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017
 6 | # @File           : graphs_trigonometric.py
 7 | # @Product        : PyCharm
 8 | 
 9 | import matplotlib.pyplot as plt
10 | import numpy as np
11 | 
12 | 
13 | # 三角函数图
14 | x = np.linspace(-np.pi, np.pi, 256, endpoint=True)
15 | 
16 | # 计算函数值
17 | y_cos, y_sin = np.cos(x), np.sin(x)
18 | 
19 | # 设置横轴和纵轴的界面高度与宽度
20 | plt.xlim(x.min() * 1.1, x.max() * 1.1)
21 | plt.ylim(y_sin.min() * 1.1, y_sin.max() * 1.1)
22 | # 横轴和纵轴描述
23 | 
24 | # 设置刻度值
25 | plt.xticks(np.linspace(-np.pi, np.pi, 5, endpoint=True))
26 | plt.yticks(np.linspace(-1, 1, 11, endpoint=True))
27 | 
28 | # 设置坐标轴的位置 Spines
29 | ax = plt.subplot(1, 1, 1)
30 | ax.spines['right'].set_color(None)
31 | ax.spines['top'].set_color(None)
32 | ax.spines['bottom'].set_position(('data', 0))
33 | ax.spines['left'].set_position(('data', 0))
34 | ax.xaxis.set_ticks_position('bottom')
35 | ax.yaxis.set_ticks_position('left')
36 | # 画图
37 | # 对线条的颜色，宽度进行设置
38 | plt.plot(x, y_sin, color='red', linewidth=2.5, label='sin(x)')
39 | plt.plot(x, y_cos, color='blue', linewidth=2.5, label='cos(x)')
40 | plt.legend(loc='upper left', frameon=False)
41 | plt.title('trigonometric function')
42 | plt.savefig("../save_file/graphs_trigonometric.png")
43 | plt.show()
44 | 


--------------------------------------------------------------------------------
/data_analysis/study_matplotlib/matplotlib_2d.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-7-26 下午9:51
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : matplotlib_2d.py
 7 | # @Product        : PyCharm
 8 | import matplotlib.pyplot as plt
 9 | import numpy as np
10 | 
11 | x = np.linspace(-2 * np.pi, 2 * np.pi, 100)
12 | xx = x + 1j * x[:, np.newaxis]  # a + ib over complex plane
13 | out = np.exp(xx)
14 | 
15 | plt.subplot(121)
16 | plt.imshow(np.abs(out))
17 | 
18 | # extent = [-2 * np.pi, 2 * np.pi, -2 * np.pi, 2 * np.pi]
19 | plt.title('Magnitude of exp(x)')
20 | 
21 | plt.subplot(122)
22 | plt.imshow(np.angle(out))
23 | 
24 | # extent = [-2 * np.pi, 2 * np.pi, -2 * np.pi, 2 * np.pi]
25 | plt.title('Phase (angle) of exp(x)')
26 | plt.show()
27 | 


--------------------------------------------------------------------------------
/data_analysis/study_matplotlib/png/numpy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomoncle/Python-notes/ce675486290c3d1c7c2e4890b57e3d0c8a1228cc/data_analysis/study_matplotlib/png/numpy.png


--------------------------------------------------------------------------------
/data_analysis/study_matplotlib/save_file/graphs_histogram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomoncle/Python-notes/ce675486290c3d1c7c2e4890b57e3d0c8a1228cc/data_analysis/study_matplotlib/save_file/graphs_histogram.png


--------------------------------------------------------------------------------
/data_analysis/study_matplotlib/save_file/graphs_quadratic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomoncle/Python-notes/ce675486290c3d1c7c2e4890b57e3d0c8a1228cc/data_analysis/study_matplotlib/save_file/graphs_quadratic.png


--------------------------------------------------------------------------------
/data_analysis/study_matplotlib/save_file/graphs_trigonometric.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomoncle/Python-notes/ce675486290c3d1c7c2e4890b57e3d0c8a1228cc/data_analysis/study_matplotlib/save_file/graphs_trigonometric.png


--------------------------------------------------------------------------------
/data_analysis/study_matplotlib/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2017/7/30 21:02
 4 | # @Author  : Tom.lee
 5 | # @Site    : 
 6 | # @File    : trigonometric.py
 7 | # @Software: PyCharm
 8 | import numpy as np
 9 | import matplotlib as mpl
10 | import matplotlib.pyplot as plt
11 | 
12 | mpl.rcParams['axes.titlesize'] = 20
13 | mpl.rcParams['xtick.labelsize'] = 16
14 | mpl.rcParams['ytick.labelsize'] = 16
15 | mpl.rcParams['axes.labelsize'] = 16
16 | mpl.rcParams['xtick.major.size'] = 0
17 | mpl.rcParams['ytick.major.size'] = 0
18 | 
19 | plt.xticks()
20 | 
21 | # 包含了狗，猫和猎豹的最高奔跑速度，还有对应的可视化颜色
22 | speed_map = {
23 |     'dog': (48, '#7199cf'),
24 |     'cat': (45, '#4fc4aa'),
25 |     'cheetah': (120, '#e1a7a2')
26 | }
27 | 
28 | # 整体图的标题
29 | fig = plt.figure('Bar chart & Pie chart')
30 | 
31 | # 在整张图上加入一个子图，121的意思是在一个1行2列的子图中的第一张
32 | ax = fig.add_subplot(121)
33 | ax.set_title('Running speed - bar chart')
34 | 
35 | # 生成x轴每个元素的位置
36 | xticks = np.arange(3)
37 | 
38 | # 定义柱状图每个柱的宽度
39 | bar_width = 0.5
40 | 
41 | # 动物名称
42 | animals = speed_map.keys()
43 | 
44 | # 奔跑速度
45 | speeds = [x[0] for x in speed_map.values()]
46 | 
47 | # 对应颜色
48 | colors = [x[1] for x in speed_map.values()]
49 | 
50 | # 画柱状图，横轴是动物标签的位置，纵轴是速度，定义柱的宽度，同时设置柱的边缘为透明
51 | bars = ax.bar(xticks, speeds, width=bar_width, edgecolor='none')
52 | 
53 | # 设置y轴的标题
54 | ax.set_ylabel('Speed(km/h)')
55 | 
56 | # x轴每个标签的具体位置，设置为每个柱的中央
57 | ax.set_xticks(xticks+bar_width/2)
58 | 
59 | # 设置每个标签的名字
60 | ax.set_xticklabels(animals)
61 | 
62 | # 设置x轴的范围
63 | ax.set_xlim([bar_width/2-0.5, 3-bar_width/2])
64 | 
65 | # 设置y轴的范围
66 | ax.set_ylim([0, 125])
67 | 
68 | # 给每个bar分配指定的颜色
69 | for bar, color in zip(bars, colors):
70 |     bar.set_color(color)
71 | 
72 | # 在122位置加入新的图
73 | ax = fig.add_subplot(122)
74 | ax.set_title('Running speed - pie chart')
75 | 
76 | # 生成同时包含名称和速度的标签
77 | labels = ['{}\n{} km/h'.format(animal, speed) for animal, speed in zip(animals, speeds)]
78 | 
79 | # 画饼状图，并指定标签和对应颜色
80 | ax.pie(speeds, labels=labels, colors=colors)
81 | 
82 | plt.show()


--------------------------------------------------------------------------------
/data_analysis/study_mlab/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Time    : 2017/7/24 21:31
4 | # @Author  : Tom.lee
5 | # @Site    : 
6 | # @File    : __init__.py.py
7 | # @Software: PyCharm
8 | 
9 | 


--------------------------------------------------------------------------------
/data_analysis/study_mlab/mlab_3d.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2017/7/24 21:32
 4 | # @Author  : Tom.lee
 5 | # @Site    : 
 6 | # @File    : mlab_a.py
 7 | # @Software: PyCharm
 8 | 
 9 | 
10 | def surface_3d():
11 |     """
12 |     使用Mayavi将二维数组绘制成3D曲面 x * exp(x**2 - y**2)
13 |     :return:
14 |     """
15 |     import numpy as np
16 |     # create data
17 |     x, y = np.ogrid[-2:2:20j, -2:2:20j]
18 |     z = x * np.exp(- x ** 2 - y ** 2)
19 | 
20 |     # view it
21 |     from mayavi import mlab
22 | 
23 |     # 绘制一个三维空间中的曲面
24 |     pl = mlab.surf(x, y, z, warp_scale="auto")
25 | 
26 |     # 在三维空间中添加坐标轴
27 |     mlab.axes(xlabel='x', ylabel='y', zlabel='z')
28 | 
29 |     # 在三维空间中添加曲面区域的外框
30 |     mlab.outline(pl)
31 | 
32 |     mlab.show()
33 | 
34 | 
35 | def surface_spherical_harmonic():
36 |     # Create the data.
37 |     from numpy import pi, sin, cos, mgrid
38 |     dphi, dtheta = pi / 250.0, pi / 250.0
39 |     [phi, theta] = mgrid[0:pi + dphi * 1.5:dphi, 0:2 * pi + dtheta * 1.5:dtheta]
40 |     m0 = 4
41 |     m1 = 3
42 |     m2 = 2
43 |     m3 = 3
44 |     m4 = 6
45 |     m5 = 2
46 |     m6 = 6
47 |     m7 = 4
48 |     r = sin(m0 * phi) ** m1 + cos(m2 * phi) ** m3 + sin(m4 * theta) ** m5 + cos(m6 * theta) ** m7
49 |     x = r * sin(phi) * cos(theta)
50 |     y = r * cos(phi)
51 |     z = r * sin(phi) * sin(theta)
52 | 
53 |     # View it.
54 |     from mayavi import mlab
55 |     mlab.mesh(x, y, z)
56 |     mlab.show()
57 | 
58 | 
59 | def test_plot3d():
60 |     import numpy
61 |     from mayavi import mlab
62 | 
63 |     """Generates a pretty set of lines."""
64 |     n_mer, n_long = 6, 11
65 |     pi = numpy.pi
66 |     dphi = pi / 1000.0
67 |     phi = numpy.arange(0.0, 2 * pi + 0.5 * dphi, dphi)
68 |     mu = phi * n_mer
69 |     x = numpy.cos(mu) * (1 + numpy.cos(n_long * mu / n_mer) * 0.5)
70 |     y = numpy.sin(mu) * (1 + numpy.cos(n_long * mu / n_mer) * 0.5)
71 |     z = numpy.sin(n_long * mu / n_mer) * 0.5
72 | 
73 |     l = mlab.plot3d(x, y, z, numpy.sin(mu), tube_radius=0.025, colormap='Spectral')
74 |     mlab.show()
75 | 
76 | 
77 | if __name__ == '__main__':
78 |     # surface_spherical_harmonic()
79 |     # surface_3d()
80 |     test_plot3d()
81 | 


--------------------------------------------------------------------------------
/data_analysis/study_numpy/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Time    : 2017/7/23 12:09
4 | # @Author  : Tom.lee
5 | # @Site    : 
6 | # @File    : __init__.py.py
7 | # @Software: PyCharm
8 | 
9 | 


--------------------------------------------------------------------------------
/data_analysis/study_numpy/_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2017/9/5 22:35
 4 | # @Author  : Tom.lee
 5 | # @Site    : 
 6 | # @File    : _test.py
 7 | # @Software: PyCharm
 8 | 
 9 | # import numpy as np
10 | 
11 | 


--------------------------------------------------------------------------------
/data_analysis/study_numpy/numpy_functions/np_arange.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-9-7 下午3:10
 4 | # @Author         : Tom.Lee
 5 | # @File           : np_arange.py
 6 | # @Product        : PyCharm
 7 | # @Docs           : 
 8 | # @Source         : 
 9 | 
10 | import numpy as np
11 | 
12 | 
13 | # 四维数组
14 | t = np.arange(3 * 4 * 5 * 6).reshape((3, 4, 5, 6))
15 | print len(t), len(t[0]), len(t[0][0]), len(t[0][0][0])
16 | 
17 | s = np.arange(3 * 4 * 5 * 6)[::-1].reshape((5, 4, 6, 3))
18 | print len(s), len(s[0]), len(s[0][0]), len(s[0][0][0])
19 | 


--------------------------------------------------------------------------------
/data_analysis/study_numpy/numpy_functions/np_dot.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Time           : 17-9-7 下午3:07
  4 | # @Author         : Tom.Lee
  5 | # @File           : np_dot.py
  6 | # @Product        : PyCharm
  7 | # @Docs           : 
  8 | # @Source         : 
  9 | import numpy as np
 10 | 
 11 | """
 12 | >>> import numpy as np
 13 | Examples
 14 | --------
 15 | 
 16 | >>> np.random.rand(3,2)
 17 | array([[ 0.14022471,  0.96360618],  #random
 18 |        [ 0.37601032,  0.25528411],  #random
 19 |        [ 0.49313049,  0.94909878]]) #random
 20 | 
 21 | 
 22 | >>> np.dot(3, 4)
 23 | 12
 24 | 
 25 | Neither argument is complex-conjugated:
 26 | 
 27 | >>> np.dot([2j, 3j], [2j, 3j])
 28 | (-13+0j)
 29 | 
 30 | For 2-D arrays it is the matrix product:
 31 | 
 32 | >>> a = [[1, 0], [0, 1]]
 33 | >>> b = [[4, 1], [2, 2]]
 34 | >>> np.dot(a, b)
 35 | array([[4, 1],
 36 |        [2, 2]])
 37 | 
 38 | >>> a = np.arange(3*4*5*6).reshape((3,4,5,6))
 39 | >>> b = np.arange(3*4*5*6)[::-1].reshape((5,4,6,3))
 40 | >>> np.dot(a, b)[2,3,2,1,2,2]
 41 | 499128
 42 | >>> sum(a[2,3,2,:] * b[1,2,:,2])
 43 | 499128
 44 | 
 45 | """
 46 | 
 47 | # ############################### 一维 ###############################
 48 | """
 49 | 参数个数相同：
 50 | """
 51 | 
 52 | print np.dot(3, 4)  # 3*4 -> 12
 53 | print np.dot([1, 2, 3], [4, 5, 6])  # 1 * 4 + 2 * 5 + 3 * 6 -> 32
 54 | 
 55 | """
 56 | 参数列表不同(短的参数元素个数只能为1,且不能为列表[]类型):
 57 | 如：
 58 | >>> np.dot([1, 2, 3], [4, 5])
 59 | ValueError: shapes (3,) and (2,) not aligned: 3 (dim 0) != 2 (dim 0)
 60 | 
 61 | >>> np.dot([1, 2, 3], [4])　
 62 | ValueError: shapes (3,) and (1,) not aligned: 3 (dim 0) != 1 (dim 0)
 63 | 
 64 | >>> np.dot([1, 2, 3], 4)
 65 | [ 4  8 12]
 66 | 
 67 | """
 68 | print np.dot([1, 2, 3], 4)  # [1*4,2*4,3*4] -> [ 4  8 12]
 69 | 
 70 | 
 71 | # ############################### 二维 ###############################
 72 | """
 73 | 参数个数相同：
 74 | 
 75 | 计算过程:
 76 | 
 77 | 第一轮:
 78 |     1. A中取第一个元素[x1, y1]
 79 |        B中取各个元素中的第一个值[m1, m2]
 80 |        矩阵相乘-> x1*m1+y1*m2
 81 | 
 82 |     2. A中取第一个元素[x1, y1]
 83 |        B中取各个元素中的第二个值[n1, n2]
 84 |        矩阵相乘-> x1*n1+y1*n2
 85 | --> [[ 77 110]]
 86 | 第二轮:
 87 |     1. A中取第二个元素[x2, y2]
 88 |        B中取各个元素中的第一个值[m1, m2]
 89 |        矩阵相乘-> x2*m1+y2*m2
 90 | 
 91 |     2. A中取第二个元素[x2, y2]
 92 |        B中取各个元素中的第二个值[n1, n2]
 93 |        矩阵相乘-> x2*n1+y2*n2
 94 | --> [[ 77 110] [165 242]]
 95 | 
 96 | 
 97 | """
 98 | 
 99 | x1, y1 = 1, 2
100 | x2, y2 = 3, 4
101 | 
102 | m1, n1 = 11, 22
103 | m2, n2 = 33, 44
104 | 
105 | A = [[x1, y1], [x2, y2]]  # 行
106 | B = [[m1, n1], [m2, n2]]  # 列
107 | 
108 | print np.dot(A, B)
109 | # [[ 77 110]
110 | #  [165 242]]
111 | 
112 | print '测试计算过程:'
113 | print x1 * m1 + y1 * m2, x1 * n1 + y1 * n2  # 77 110
114 | print x2 * m1 + y2 * m2, x2 * n1 + y2 * n2  # 165 242
115 | 
116 | 
117 | def my_dot_w2(a, b):
118 |     # 判断是否为列表
119 |     if isinstance(a, list) and isinstance(b, list):
120 |         assert len(a) == len(b)
121 |         l1, l2 = a, b
122 |         result = []
123 | 
124 |         if isinstance(l1[0], list):  # 判断是否为多维数组
125 |             size = len(l1)
126 |             for index, value in enumerate(l1):
127 |                 start, cell = 0, []
128 | 
129 |                 while start < size:
130 |                     cell.append(my_dot_w2(value, map(lambda x: x[start], l2)))
131 |                     start += 1
132 | 
133 |                 result.append(cell)
134 |             return result
135 | 
136 |         else:  # 一维数组
137 |             return sum(map(lambda j: l1[j] * l2[j], xrange(len(l1))))
138 | 
139 |     # 以下为数字与数组的矩阵算法，找出集合
140 |     elif isinstance(a, list) and isinstance(b, int):
141 |         return map(lambda x: x * b, a)
142 | 
143 |     elif isinstance(b, list) and isinstance(a, int):
144 |         return map(lambda x: x * a, b)
145 | 
146 |     # 都为数字的算法
147 |     elif isinstance(a, int) and isinstance(b, int):
148 |         return a * b
149 | 
150 |     # 其他情况抛出异常
151 |     else:
152 |         raise Exception('params must be "list or int"!')
153 | 
154 | 
155 | print '**' * 50
156 | print my_dot_w2([1, 2], 3)  # 1*3,2*3 = [3, 6]
157 | print np.dot([1, 2], 3)
158 | 
159 | print my_dot_w2(3, [1, 2])  # 3*1,3*2 = [3, 6]
160 | print np.dot(3, [1, 2])
161 | 
162 | print my_dot_w2([1, 2], [3, 4])  # 1*3+2*4 = 11
163 | print np.dot([1, 2], [3, 4])
164 | 
165 | print my_dot_w2(A, B)
166 | print np.dot(A, B)
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 


--------------------------------------------------------------------------------
/data_analysis/study_numpy/numpy_functions/np_mgrid_ogrid.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-9-7 下午3:15
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : np_mgrid_ogrid.py
 7 | # @Product        : PyCharm
 8 | # @Docs           : 
 9 | # @Source         : 
10 | 
11 | 
12 | # #创建网格索引
13 | """
14 | >>> import numpy as np
15 | 
16 | 
17 | # 密集网格np.mgrid
18 | >>> mgrid = np.lib.index_tricks.nd_grid()
19 | >>> mgrid[0:5,0:5]
20 | array([[[0, 0, 0, 0, 0],
21 |         [1, 1, 1, 1, 1],
22 |         [2, 2, 2, 2, 2],
23 |         [3, 3, 3, 3, 3],
24 |         [4, 4, 4, 4, 4]],
25 |        [[0, 1, 2, 3, 4],
26 |         [0, 1, 2, 3, 4],
27 |         [0, 1, 2, 3, 4],
28 |         [0, 1, 2, 3, 4],
29 |         [0, 1, 2, 3, 4]]])
30 | >>> mgrid[-1:1:5j]
31 | array([-1. , -0.5,  0. ,  0.5,  1. ])
32 | 
33 | 
34 | # 稀疏网格np.ogrid
35 | >>> ogrid = np.lib.index_tricks.nd_grid(sparse=True)
36 | >>> ogrid[0:5,0:5]
37 | [array([[0],
38 |         [1],
39 |         [2],
40 |         [3],
41 |         [4]]), array([[0, 1, 2, 3, 4]])]
42 | 
43 | 
44 | """
45 | 


--------------------------------------------------------------------------------
/data_analysis/study_numpy/numpy_functions/np_random.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-9-7 下午3:12
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : np_random.py
 7 | # @Product        : PyCharm
 8 | # @Docs           : 
 9 | # @Source         : 
10 | 
11 | """
12 | >>> import numpy as np
13 | 
14 | 
15 | # 创建随机二维数组
16 | >>> np.random.rand(3,2)
17 | array([[ 0.14022471,  0.96360618],  #random
18 |        [ 0.37601032,  0.25528411],  #random
19 |        [ 0.49313049,  0.94909878]]) #random
20 | 
21 | """
22 | 


--------------------------------------------------------------------------------
/data_analysis/study_numpy/numpy_multidimensional.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2017/7/23 13:12
 4 | # @Author  : Tom.lee
 5 | # @Site    : 
 6 | # @File    : numpy_list_multidimensional.py
 7 | # @Software: PyCharm
 8 | 
 9 | """
10 | numpy 多维数组
11 | """
12 | import numpy as np
13 | 
14 | 
15 | def split_line():
16 |     print '*' * 6 ** 2
17 | 
18 | 
19 | a = np.arange(10, 0, -1)
20 | print a
21 | b = np.arange(100, 200, 10)
22 | print b
23 | split_line()
24 | 
25 | 
26 | def multi_2():
27 |     """
28 |     构建 x,y 二维数组
29 |     
30 |     reshape(纵轴高度,横轴高度)
31 |     :return: 
32 |     """
33 |     y = a.reshape(-1, 1)  # 表示y轴
34 |     x = b  # 表示x轴
35 |     xy = y + x  # 表示y轴元素与x序列每个元素想加
36 |     print xy
37 | 
38 |     print '下标（5,5）：', xy[5, 5]
39 |     print xy.shape
40 |     split_line()
41 | 
42 | 
43 | def multi_2_func():
44 |     """
45 |     使用函数创建2维数组
46 |     :return: 
47 |     """
48 |     print np.fromfunction(lambda x, y: (x + 1) * y, (10, 5))
49 |     split_line()
50 | 
51 | 
52 | def sin():
53 |     """
54 |     正弦函数
55 |     :return: 
56 |     """
57 |     x = np.linspace(0, 2 * np.pi, 10)
58 | 
59 |     # 使用np.sin(x)对 每个x中的元素求正弦值,x值不变
60 |     y = np.sin(x)
61 |     print x, '\n', y
62 |     split_line()
63 | 
64 |     # 使用np.sin(x,x) 对每个x中的元素求正弦值，并赋值给x, 即x,z 共享内存空间
65 |     z = np.sin(x, x)
66 |     print x, '\n', z
67 |     split_line()
68 | 
69 | 
70 | if __name__ == '__main__':
71 |     # multi_2()
72 |     # multi_2_func()
73 |     sin()
74 | 


--------------------------------------------------------------------------------
/data_analysis/study_numpy/numpy_ndarray.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Time    : 17-7-22 上午12:41
  4 | # @Author  : tom.lee
  5 | # @File    : study_numpy.py
  6 | # @Software: PyCharm
  7 | 
  8 | """
  9 | numpy
 10 | Numpy是Python的一个科学计算的库，提供了矩阵运算的功能
 11 | """
 12 | 
 13 | import numpy as np
 14 | 
 15 | 
 16 | def split_line():
 17 |     print '*' * 6 ** 2
 18 | 
 19 | 
 20 | def np_version():
 21 |     """
 22 |     版本
 23 |     :return:
 24 |     """
 25 |     print np.version.version
 26 | 
 27 | 
 28 | def np_list():
 29 |     """
 30 |     numpy 数组 ：
 31 | 
 32 |     只能存储一种数据结构，
 33 |     使用 "numpy.array()"来创建，
 34 |     使用" dtype = numpy.类型" 来显示指定
 35 | 
 36 |     :return:
 37 |     """
 38 |     # 创建
 39 |     l = np.array([1, 2, 3], dtype=np.int8)
 40 |     a = np.array([1, 2, 3, 4])
 41 |     b = np.array((5, 6, 7, 8))
 42 |     c = np.array([[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 10]])
 43 |     print 'l:', l
 44 |     print 'a:', a
 45 |     print 'b:', b
 46 |     print 'c:', c
 47 |     split_line()
 48 | 
 49 |     # 类型
 50 |     print l.dtype, c.dtype
 51 |     split_line()
 52 | 
 53 |     # 大小:  数组a的shape只有一个元素，因此它是一维数组。
 54 |     #       而数组c的shape有两个元素，因此它是二维数组，其中第0轴的长度为3，第1轴的长度为4
 55 |     print l.shape, c.shape
 56 |     split_line()
 57 | 
 58 |     # 改变数组每个轴的长度 : 只是改变每个轴的大小，数组元素在内存中的位置并没有改变
 59 |     c.shape = 4, 3
 60 |     print c
 61 |     split_line()
 62 | 
 63 |     # 当某个轴的元素为-1时，将根据数组元素的个数自动计算此轴的长度，因此下面的程序将数组c的shape改为了(2,6)
 64 |     c.shape = 2, -1
 65 |     print c
 66 |     split_line()
 67 | 
 68 |     # 使用数组的reshape方法，可以创建一个改变了尺寸的新数组，原数组的shape保持不变
 69 |     # 注意此时数组a和d其实共享数据存储内存区域
 70 |     d = a.reshape((2, 2))
 71 |     print 'a:', a
 72 |     print 'd:', d
 73 |     split_line()
 74 | 
 75 | 
 76 | def np_list_create():
 77 |     # 使用xrange创建一维数组 [start,end,步长)包含起始位置,不包含终止位置,
 78 |     # 元素个数: (end-start)/步长
 79 |     np_lst = np.arange(0, 10, 1)
 80 |     print np_lst
 81 |     print '大小:%d' % np_lst.shape
 82 |     split_line()
 83 | 
 84 |     # 等差数列
 85 |     # linspace(strat,end,size), [start,end]包含起始位置和终止位置,一共创建size个元素
 86 |     # 可以通过endpoint关键字指定是否包括终值
 87 |     print np.linspace(0, 1, 12)
 88 |     split_line()
 89 | 
 90 |     # 等比数列
 91 |     # logspace(开始指数，结束指数，数量，底数默认10)
 92 |     print np.logspace(0, 2, 20)
 93 |     split_line()
 94 | 
 95 | 
 96 | def np_list_by_byte():
 97 |     """
 98 |     使用frombuffer, fromstring, fromfile等函数可以从字节序列创建数组
 99 |     使用时一定要传入dtype参数
100 |     
101 |     Python的字符串实际上是字节序列，每个字符占一个字节，
102 |     因此如果从字符串s创建一个8bit的整数数组的话，所得到的数组正好就是字符串中每个字符的ASCII编码
103 |     :return: 
104 |     """
105 |     s = 'abcdefg'
106 |     print np.frombuffer(s, dtype=np.int8)
107 |     split_line()
108 | 
109 |     print np.fromstring(s, dtype=np.int8)
110 |     split_line()
111 | 
112 |     # 如果从字符串s创建16bit的整数数组，那么两个相邻的字节就表示一个整数，
113 |     # 把字节98和字节97当作一个16位的整数， 它的值就是98*256+97 = 25185。
114 |     # 可以看出内存中是以little endian(低位字节在前)方式保存数据的。
115 |     # 所以字符串的长度必须是偶数
116 |     print np.fromstring('abcdefgh', dtype=np.int16)
117 |     split_line()
118 | 
119 | 
120 | def np_list_by_func():
121 |     """
122 |     通过函数创建数组
123 |     :return: 
124 |     """
125 |     # fromfunction 传入一个函数，和表示一个维度大小的可迭代对象（元组，列表）
126 |     # 即（10，）表示一维数组，一维元素10个，此时函数接收一个参数
127 |     #   (5,6)表示二维数组，一维元素5个，二维元素6个，此时函数接收2个参数
128 |     print np.fromfunction(lambda x: x + 1, (10,))
129 |     print np.fromfunction(lambda x, y: (x + 1) * (y + 1), (5, 6))
130 |     split_line()
131 | 
132 | 
133 | def np_list_opt():
134 |     """
135 |     numpy 列表基本操作和python list基本一致
136 |     :return: 
137 |     """
138 |     l = np.arange(10, 1, -1)
139 |     print l
140 |     print '做小值：', l.min()
141 |     print '最大值：', l.max()
142 |     print '下标0的元素：', l[0]
143 |     split_line()
144 | 
145 |     # 高级用法，不会共享内存空间，以上操作会共享内存空间
146 |     print l[np.array([1, 5, 3])]  # 使用数组获取下标元素
147 |     print l[[1, 5, 3]]  # 使用列表获取下标元素　
148 |     split_line()
149 | 
150 |     # 列表直接过滤
151 |     print l[l > 3]  # 直接获取列表大于3的值
152 |     print l > 3  # 判断列表元素是否大于3返回一个boolean 列表
153 |     split_line()
154 | 
155 | 
156 | if __name__ == '__main__':
157 |     # np_version()
158 |     # np_list()
159 |     # np_list_create()
160 |     # np_list_by_byte()
161 |     # np_list_by_func()
162 |     # np_list_opt()
163 |     print np.fromfunction(lambda x: x, (10,))
164 | 


--------------------------------------------------------------------------------
/data_analysis/study_numpy/numpy_polynomial_poly1d.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2017/7/30 13:44
 4 | # @Author  : Tom.lee
 5 | # @File    : numpy_polynomial_poly1d.py
 6 | # @Software: PyCharm
 7 | 
 8 | """
 9 | 多项式
10 | """
11 | import numpy as np
12 | 
13 | # 构造多项式
14 | p1 = np.poly1d([1])  # 1
15 | p2 = np.poly1d([1, 2])  # x + 2
16 | p3 = np.poly1d([1, 2, 3])  # x^2 + 2x + 3
17 | p4 = np.poly1d([1, 2, 3, 4])  # x^3 + 2* x^2 + 3x + 4
18 | print '\np1:', p1
19 | print '\np2:', p2
20 | print '\np3:', p3
21 | print '\np4:', p4
22 | # 评估x = 0.5处的多项式：
23 | print '\n\n求函数0.5处的值：'
24 | print p1(0.5), 1
25 | print p2(0.5), 0.5 + 2
26 | print p3(0.5), 0.5 ** 2 + 2 * 0.5 + 3
27 | print p4(0.5), 0.5 ** 3 + 2 * 0.5 ** 2 + 3 * 0.5 + 4
28 | # 解
29 | print '\n\n解：'
30 | print p1.r
31 | print p2.r
32 | print p3.r
33 | print p4.r
34 | 
35 | 
36 | print "*" * 20, 'Y = X + 1', "*" * 20
37 | x = np.linspace(0, 1, 10)  # 构造x
38 | y = np.array(map(lambda x: x + 1, x))  # 计算y
39 | m = np.polyfit(x, y, 2)  # 拟合多项式参数
40 | y1 = np.poly1d(m)  # 构造多项式
41 | print "\nx取值：", x
42 | print "\ny取值：", y
43 | print "\n多项式参数：", m
44 | print "\n一维多项式：", y1
45 | print "\n使用一维多项式计算y值：", y1(x)
46 | print "\n计算一维多项式的解：", y1.r
47 | print "\n比较多项式与函数计算的结果：", "多项式：", y1(120), " 函数：", 120 + 1
48 | 


--------------------------------------------------------------------------------
/data_analysis/study_numpy/numpy_ufunc.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2017/7/23 21:51
 4 | # @Author  : Tom.lee
 5 | # @Site    : 
 6 | # @File    : numpy_ufunc.py
 7 | # @Software: PyCharm
 8 | 
 9 | """
10 | ufunc是universal function的缩写，它是一种能对数组的每个元素进行操作的函数。
11 | NumPy内置的许多ufunc函数都是在C语言级别实现的，因此它们的计算速度非常快
12 | """
13 | import numpy as np
14 | 
15 | 
16 | def foo(x):
17 |     # 由图可知，函数分3段,周期函数
18 |     # 设：y= kx + b ,且b=0
19 |     c0, hc, c = 0.4, 1.0, 0.6
20 |     if x > 1:
21 |         x = float(x) % 1.0
22 |     if x <= c0:
23 |         k = hc / c0
24 |         return k * x
25 |     elif x < c:
26 |         k = (hc - 0) / (c0 - c)
27 |         return k * (x - c)
28 |     else:
29 |         return 0
30 | 
31 | 
32 | X = np.linspace(0, 2, 20)
33 | Y = np.array(map(foo, X))
34 | print X
35 | print Y
36 | 


--------------------------------------------------------------------------------
/data_analysis/study_numpy/png/numpy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomoncle/Python-notes/ce675486290c3d1c7c2e4890b57e3d0c8a1228cc/data_analysis/study_numpy/png/numpy.png


--------------------------------------------------------------------------------
/data_analysis/study_tesseract/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Time           : 17-8-7 下午12:52
4 | # @Author         : Tom.Lee
5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
6 | # @File           : __init__.py.py
7 | # @Product        : PyCharm
8 | 


--------------------------------------------------------------------------------
/data_analysis/study_tesseract/image/20170807142300.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomoncle/Python-notes/ce675486290c3d1c7c2e4890b57e3d0c8a1228cc/data_analysis/study_tesseract/image/20170807142300.png


--------------------------------------------------------------------------------
/data_analysis/study_tesseract/test01.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-8-7 下午1:21
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : test01.py
 7 | # @Product        : PyCharm
 8 | 
 9 | """
10 | # BASH
11 | 
12 | $ tesseract image_path out
13 | $ cat out.txt
14 | """
15 | import Image
16 | 
17 | import pytesseract
18 | 
19 | file_obj = Image.open('./image/20170807142300.png')
20 | print pytesseract.image_to_string(file_obj)
21 | 


--------------------------------------------------------------------------------
/data_structure.md:
--------------------------------------------------------------------------------
 1 | # 数据结构
 2 | 查看python对象在内存的地址可以使用内置的`id()`方法来查看．例如`id(3)`
 3 | 
 4 | ## 基本数据类型
 5 | 
 6 | ## 引用数据类型
 7 | 
 8 | ## 可变对象
 9 | 
10 | ## 不可变对象
11 | 


--------------------------------------------------------------------------------
/decorator.md:
--------------------------------------------------------------------------------
  1 | # decorator装饰器
  2 | 
  3 | 装饰器本质上是一个Python函数，它可以让其他函数在不需要做任何代码变动的前提下增加额外功能，
  4 | 装饰器的返回值也是一个函数对象。
  5 | 它经常用于有切面需求的场景，比如：插入日志、性能测试、事务处理、缓存、权限校验等场景。
  6 | 概括的讲，装饰器的作用就是为已经存在的对象添加额外的功能。
  7 | 
  8 | #函数装饰器
  9 | ###简单的装饰器
 10 | ```python
 11 | def decorator(func):
 12 |     """
 13 |     func 即为调用该函数的方法
 14 |     """
 15 |     def wrapper(*args, **kwargs):
 16 |         print '方法%s调用装饰器' % func.__name__
 17 |         return func(*args, **kwargs)
 18 | 
 19 |     return wrapper
 20 | 
 21 | 
 22 | @decorator
 23 | def show():
 24 |     """
 25 |     @符号是装饰器的语法糖,在定义函数的时候使用，避免再一次赋值操作
 26 |     使用＠语法糖，等价于 show=decorator(show)
 27 |     """
 28 |     print '......show......'
 29 | 
 30 | 
 31 | 
 32 | show()
 33 | 
 34 | ```
 35 | ###带参数的装饰器
 36 | ```python
 37 | def logging(level):
 38 |     """
 39 |     对简单装饰器的一次封装，返回一个新的装饰器,传递了level参数
 40 |     """
 41 | 
 42 |     def decorator(func):
 43 |         def wrapper(*args, **kwargs):
 44 |             if level == "warn":
 45 |                 print ("%s is running" % func.__name__)
 46 |             return func(*args, **kwargs)
 47 | 
 48 |         return wrapper
 49 | 
 50 |     return decorator
 51 | 
 52 | 
 53 | @logging(level="warn")
 54 | def hell(name='foo'):
 55 |     print("i am %s" % name)
 56 | 
 57 | 
 58 | hell()
 59 | ```
 60 | #类装饰器
 61 | 相比函数装饰器，类装饰器具有灵活度大、高内聚、封装性等优点。
 62 | 使用类装饰器还可以依靠类内部的`__call__`方法，
 63 | 当使用`@`形式将装饰器附加到函数上时，就会调用此方法。
 64 | 
 65 | ```python
 66 | class Logging(object):
 67 |     def __init__(self, func):
 68 |         self._func = func
 69 | 
 70 |     def __call__(self):
 71 |         print ('Logging starting')
 72 |         self._func()
 73 |         print ('Logging ending')
 74 | 
 75 | 
 76 | @Logging
 77 | def tes():
 78 |     print ('bar')
 79 | 
 80 | 
 81 | tes()
 82 | ```
 83 | 
 84 | #装饰器缺点
 85 | ###举例
 86 | 使用装饰器极大地复用了代码，但是他有一个缺点就是"原函数的元信息"不见了
 87 | ```python
 88 | def decorator(func):
 89 |     def wrapper(*args, **kwargs):
 90 |         print func.__name__, func.__doc__, 'call decorator'
 91 |         return func(*args, **kwargs)
 92 | 
 93 |     return wrapper
 94 | 
 95 | 
 96 | @decorator
 97 | def show():
 98 |     """ show test """
 99 |     print '......'
100 | 
101 | 
102 | show()
103 | print show.__name__  # wrapper
104 | print show.__doc__  # None
105 | ```
106 | ###改进装饰器
107 | 使用`functools.wraps`装饰器,它能把原函数的元信息拷贝到装饰器函数中，
108 | 这使得装饰器函数也有和原函数一样的元信息了。
109 | ```python
110 | from functools import wraps
111 | 
112 | def decorator(func):
113 |     @wraps(func)
114 |     def wrapper(*args, **kwargs):
115 |         print func.__name__, func.__doc__, 'call decorator'
116 |         return func(*args, **kwargs)
117 | 
118 |     return wrapper
119 | 
120 | 
121 | @decorator
122 | def show():
123 |     """ show test """
124 |     print '......'
125 | 
126 | 
127 | show()
128 | print show.__name__  # show
129 | print show.__doc__  # show test
130 | 
131 | ```
132 | 
133 | 
134 | # 内置装饰器
135 | @staticmathod、@classmethod、@property
136 | 
137 | 
138 | # 装饰器的顺序
139 | ```python
140 | @a
141 | @b
142 | @c
143 | def f ():
144 |     pass
145 | # 等效于
146 | f = a(b(c(f)))
147 | ```
148 | 
149 | 
150 | 


--------------------------------------------------------------------------------
/dict.md:
--------------------------------------------------------------------------------
  1 | # python 字典dict
  2 | 无序，k-v对存在，查找速度快，占用内存高，key是唯一值，不能重复。
  3 | 在python终端，使用`dir(dict)`获取方法及属性列表，使用`help(dict)`获取其使用方法
  4 | 
  5 | # 基本操作
  6 | ### 声明
  7 | * `声明`：`dict={}`
  8 | * `声明并赋值`：`dict={'key':'value','num':1,'list':[1,2],'tup':(1,2,3)}`
  9 | 
 10 | ### 添加或修改
 11 | * `dict['key'] = 'value'`：使用dict['key']=value的方式为字典重新赋值，或添加元素
 12 | 
 13 | ### 删除
 14 | * `d.pop('k')` : 删除字典指定的k,并且返回该k的值
 15 | * `del d['k']` : 删除字典的指定k
 16 | * `del dict`   : 删除整个字典
 17 | 
 18 | ### 取值
 19 | * `dict['k']`     : 当dict不存在k时，抛出异常
 20 | * `dict.get('k')` : 当dict不存在k时，返回None
 21 | 
 22 | ### 遍历
 23 | * 获取keys列表：`dict.keys()`
 24 | * 获取值列表  ：`dict.values()`
 25 | * 获取(k,v)元组列表：`dict.items()`
 26 | * 获取以上列表的可迭代对象，需要使用`dict.iterkeys(); dict.itervalues(); dict.iteritems()`
 27 | ```python
 28 | for k,v in dict.items():
 29 |     print 'key:',k," -value:",v
 30 | ```
 31 | 
 32 | # 转换
 33 | * str 转 dict : 
 34 |   * `eval()`函数: 使用`eval()`函数可以使字符串转为字典 `eval(str)`
 35 |   * `exec()`函数: 需要声明一个被赋值的变量
 36 | ```python
 37 | >>> s="{'k':1,'w':2}"
 38 | >>> d=None
 39 | >>> exec('d='+s)
 40 | >>> d
 41 | {'k': 1, 'w': 2}
 42 | >>> d['k']
 43 | 1
 44 | >>> 
 45 | ```
 46 |   * json模块：转换带特殊字符的字典
 47 | ```shell
 48 | >>> s='[{"RepoDigests": null,"Created":1466711701,"Size":5042677,"VirtualSize":5042677,"Labels":null}]'
 49 | >>> import json
 50 | >>> print json.loads(s) 
 51 | [{u'Labels': None, u'Size': 5042677, u'RepoDigests': None, u'VirtualSize': 5042677, u'Created': 1466711701}]
 52 | >>> 
 53 | ```
 54 | * dict 转 str : `str(dict)`
 55 | 
 56 | # 技巧
 57 | ### 按顺序获取dict的元素
 58 | ```python
 59 | keys=dict.keys()
 60 | for k in keys.sort():
 61 |     print dict.get('k')
 62 | 
 63 | ```
 64 | 
 65 | ### 字典想加
 66 | ```python
 67 | a={1:1}
 68 | b={2:2}
 69 | 
 70 | c= dict(a,**b) # 返回值为大字典
 71 | a.update(b) # 返回值为None,a为大字典
 72 | ```
 73 | 
 74 | ### 从大字典取出小字典
 75 | ```python
 76 | dic = {'a': 1, 'b': 2, 'c': 3}
 77 | lis = ['a', 'b']
 78 | 
 79 | print dict(zip(lis, map(lambda k: dic.get(k), lis)))
 80 | ```
 81 | 
 82 | ### 两个元组或列表转字典
 83 | ```python
 84 | k = ['a', 'b', 'c']
 85 | v = [1, 2, 3]
 86 | 
 87 | print zip(k, v)
 88 | print dict(zip(k, v))
 89 | 
 90 | ```
 91 | 
 92 | ### 对象列表构造大字典
 93 | ```python
 94 | class E:
 95 |    def __init__(self, k, v):
 96 |         self.k = k
 97 |         self.v = v
 98 | 
 99 | e1 = E('a', 1)
100 | e2 = E('b', 2)
101 | e3 = E('c', 3)
102 | l = [e1, e2, e3]
103 | 
104 | print reduce(lambda o1, o2: dict(o1, **o2),
105 |              map(lambda e: {e.k: e.v}, l))
106 | ```
107 | 


--------------------------------------------------------------------------------
/file.md:
--------------------------------------------------------------------------------
  1 | # 文件处理
  2 | 
  3 | #读取文件
  4 | 
  5 | ###read()
  6 |  一次性读取，读取大文件时容易内存溢出
  7 | ```python
  8 | def read_file():
  9 |     with open(path, "r") as f:
 10 |         print f.read()   # 一次性读取，容易内存溢出
 11 | 
 12 | ```
 13 | ###readlines()
 14 | 一次性读到列表，读取大文件时容易内存溢出
 15 | ```python
 16 | def read_line_file():
 17 |     with open(path, "r") as f:
 18 |         for line in f.readlines(): # 一次性读取，容易内存溢出
 19 |             print line
 20 | ```
 21 | 
 22 | 
 23 | ###读取大文件
 24 | 1.使用pythonic方式读取大文件（推荐方法）：
 25 | ```python
 26 | def read_big_file(path):
 27 |     """
 28 |     使用pythonic方式读取大文件
 29 |     :param path:
 30 |     :return:
 31 |     """
 32 |     with open(path) as f:  # 文件对象f当作迭代对象， 系统将自动处理IO缓冲和内存管理
 33 |         for line in f:
 34 |             print line
 35 | 
 36 | ```
 37 | 2.按指定大小读取大文件（建议是二进制文件，如果是字符串处理会，出现截取不准确）：
 38 | ```python
 39 | 
 40 | def get_big_file(path, size):
 41 |     with open(path, "r") as f:
 42 |         while True:
 43 |             block = f.read(size)  # 每次读取固定长度到内存缓冲区
 44 |             if block:
 45 |                 yield block
 46 |             else:
 47 |                 return  # 如果读取到文件末尾，则退出
 48 | ```
 49 | 
 50 | #写入文件
 51 | 
 52 | ###文件复制
 53 | 使用　pythonic　方式复制文件（推荐方法）:
 54 | ```python
 55 | def copy_file(src, target):
 56 |     start = time.time()
 57 |     dest = open(target, 'wb+')
 58 |     with open(src) as f:  # 文件对象f当作迭代对象， 系统将自动处理IO缓冲和内存管理
 59 |         for line in f:
 60 |             dest.write(line)
 61 |     dest.close()
 62 |     print '时长：', time.time() - start, '秒'
 63 | ```
 64 | 按指定大小复制文件（以二进制方式打开源文件）:
 65 | ```python
 66 | def copy_file_block(src, target, size):
 67 |     start = time.time()
 68 |     dest = open(target, 'wb+')
 69 |     with open(src, "r") as f:
 70 |         while True:
 71 |             block = f.read(size)  # 每次读取固定长度到内存缓冲区
 72 |             if block:
 73 |                 dest.write(block)
 74 |             else:
 75 |                 break  # 如果读取到文件末尾，则退出
 76 |     dest.close()
 77 |     print '时长：', time.time() - start, '秒'
 78 | 
 79 | ```
 80 | 
 81 | #练习：
 82 | 计算大文件中单词排序个数
 83 | ```python
 84 |     
 85 |     import time
 86 |     
 87 |     start = time.time()
 88 |     dic = {}
 89 |     with open(path) as f:
 90 |         for line in f:
 91 |             lit = line.split(";")
 92 |             for i in lit:
 93 |                 if dic.get(i):
 94 |                     dic[i] = dic.get(i) + 1
 95 |                 else:
 96 |                     dic[i] = 1
 97 |     
 98 |     print '时长：', time.time() - start, '秒'
 99 |     sort = sorted(dic.items(), key=lambda item: item[1], reverse=True)
100 |     for v in sort:
101 |         print v
102 | ```
103 | 
104 | 
105 | 
106 | 


--------------------------------------------------------------------------------
/levenshtein.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-7-4 上午11:21
 4 | # @Author         : Tom.Lee
 5 | # @Description    : http://blog.csdn.net/gzlaiyonghao/article/details/1483728 
 6 | # @File           : test.py
 7 | # @Product        : PyCharm
 8 | 
 9 | import re
10 | 
11 | """
12 | Levenshtein Distance 编辑距离算法,计算字符串相似度
13 | 
14 |     比如要计算cafe和coffee的编辑距离。cafe→caffe→coffe→coffee 为3
15 | 先创建一个6×8的表（cafe长度为4，coffee长度为6，各加2）＊代表空白占位符
16 | 
17 | *	*	c	o	f	f	e	e
18 | 
19 | *   0	1	2	3	4	5	6
20 | 
21 | c	1	0	1	2	3	4	5
22 | 
23 | a	2	1	1	2	3	4	5
24 | 
25 | f	3	2	2	1	2	3	4
26 | 
27 | e	4	3	3	2	2	2	3
28 | 
29 | 
30 | 从3,3格开始，开始计算。取以下三个值的最小值：
31 | 1.如果最上方的字符等于最左方的字符，则为左上方的数字。否则为左上方的数字+1。（对于3,3来说为0）
32 | 2.左方数字+1（对于3,3格来说为2）
33 | 3.上方数字+1（对于3,3格来说为2）
34 | """
35 | 
36 | a = 'cafee'
37 | b = 'cof1ee'
38 | 
39 | 
40 | def minimum(a, b):
41 |     m, n = len(a), len(b)
42 |     col_size, matrix = m + 1, []
43 |     for i in range((m + 1) * (n + 1)):
44 |         matrix.append(0)
45 |     for i in range(col_size):
46 |         matrix[i] = i
47 |     for i in range(n + 1):
48 |         matrix[i * col_size] = i
49 |     for i in range(n + 1)[1:n + 1]:
50 |         for j in range(m + 1)[1:m + 1]:
51 |             if a[j - 1] == b[i - 1]:
52 |                 cost = 0
53 |             else:
54 |                 cost = 1
55 |             min_value = matrix[(i - 1) * col_size + j] + 1
56 |             if min_value > matrix[i * col_size + j - 1] + 1:
57 |                 min_value = matrix[i * col_size + j - 1] + 1
58 |             if min_value > matrix[(i - 1) * col_size + j - 1] + cost:
59 |                 min_value = matrix[(i - 1) * col_size + j - 1] + cost
60 |             matrix[i * col_size + j] = min_value
61 |     return matrix[n * col_size + m]
62 | 
63 | 
64 | s1 = 'Invalid input for operation: Requested subnet with cidr: 172.16.17.0' \
65 |      '/24 for network: c6aa9c38-ccee-467f-a1e7-c718a33ecc06 overlaps with another subnet.'
66 | 
67 | s2 = 'Invalid input for operation: Requested subnet with cidr: 192.168.11.0/24' \
68 |      ' for network: 028d91af-b461-4d9d-ab76-da4a8845d3cf overlaps with another subnet.'
69 | 
70 | 
71 | def pop_cidr_uuid(s):
72 |     s = re.compile(r'(?<![\.\d])(?:\d{1,3}\.){3}\d{1,3}(?![\.\d])/24', re.S).sub('', s)
73 |     s = re.compile(r'([a-f\d]{8}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{12})', re.S).sub('', s)
74 |     return s
75 | 
76 | 
77 | s001 = 'Invalid input for operation: Requested subnet with cidr:  for network:  overlaps with another subnet.'
78 | print minimum(s001, pop_cidr_uuid(s1))
79 | 


--------------------------------------------------------------------------------
/list.md:
--------------------------------------------------------------------------------
  1 | # list 列表
  2 | 列表list为Python的有序集合，列表的下标从0开始，`list[0]`获取第一个元素；列表支持倒序操作；`list[-1]`获取列表倒数第一个元素。
  3 | 在python终端，使用`dir(list)`获取方法及属性列表，使用`help(list)`获取其使用方法,　可变对象
  4 | 
  5 | # 基本操作
  6 | ### 声明
  7 | 
  8 | ```python
  9 | # 创建一个空列表
 10 | list_val = []
 11 | 
 12 | # 声明并赋值,可以是不同的类型
 13 | list_val = [1,2,2,'abc',[1,2,3],{'k':v}]
 14 | ```
 15 | ### 添加
 16 | * `list.append(val)`追加到列表尾部；
 17 | * `list.insert(index,val)`按下标插入到列表内部任意位置
 18 | 
 19 | ### 删除
 20 | 注意删除元素时，要判读该元素是否存在列表中`if 5 in list: list.remove(5)` 或者 `if index < len(list): list.pop(index)`
 21 | * `list.remove(val)` 删除列表元素；注意要判读该元素是否存在列表中`if 5 in list: list.remove(5)`
 22 | * `list.pop()` 删除尾部元素，并返回该元素的值 
 23 | * `list.pop(index)` 删除指定位置的元素，并返回该元素的值 
 24 | * `del list[index]` 删除指定位置的元素，没有返回值
 25 | * `del list` 删除整个列表，没有返回值
 26 | 
 27 | ### 修改
 28 | * `list[index] = new_val` 使用下标更新列表元素
 29 | 
 30 | # 排序
 31 | ### sort()永久排序
 32 | `list.sort()` 使用sort()方法对list列表进行永久排序，没有参数默认根据数字升序，字母顺序排序
 33 | `list.sort(reverse=True)` 使用sort()方法的reverse=True参数对list列表进行永久排序，按与默认顺序相反的顺序排序
 34 | ```python
 35 | list=[1,'a',3,2]
 36 | 
 37 | list.sort()  # 该方法没有返回值 print list.sort() >> None
 38 | print list   # [1,2,3,'a']
 39 | 
 40 | ```
 41 | 
 42 | ### sorted()临时排序
 43 | `sorted()`方法的排序规则与sort()方法相同
 44 | ```python
 45 | list=[1,3,2]
 46 | 
 47 | print sorted(list)  # [1,2,3]
 48 | print list 			# [1,3,2]
 49 | 
 50 | ```
 51 | 
 52 | ### reverse()列表反转
 53 | 使用`reverse()`方法使列表元素顺序发生反转，注意该方法对列表的改变是永久的
 54 | ```python
 55 | list=[1,2,3]
 56 | 
 57 | print list   			# [1,2,3]
 58 | 
 59 | list.reverse()    		# None, 该方法没有返回值
 60 | print list              	# [3,2,1] ,使用`reverse()`方法永久的改变了列表的元素位置
 61 | 
 62 | 
 63 | ```
 64 | ---
 65 | # 确定列表长度
 66 | 使用`len()`函数获取列表长度，如：`list=[1,2,3] ; print len(list) >> 3`
 67 | 
 68 | ---
 69 | # 遍历
 70 | * 带下标遍历:
 71 | ```python
 72 | x = [11, 12, 13, 14, 15]
 73 | for index, value in enumerate(x):
 74 |     print index, ':', value
 75 | ```
 76 | 
 77 | * 简单遍历:
 78 | ```python
 79 | for val in list:
 80 |    print val
 81 | ```
 82 | 
 83 | * 精简操作,使用列表解析式: `[表达式 for v in list]` 返回一个新的列表
 84 | ```python
 85 | num_list = [1, 2, 3] # [1,2,3]
 86 | 
 87 | # 简单的列表解析式
 88 | num_list = [v+1 for v in num_list] # [2,3,4]
 89 | 
 90 | # 嵌套的列表解析式
 91 | ww, ll = ['1', '22', '333', '4444'], []
 92 | for w in ww:
 93 |     for l in w:
 94 |         ll.append(l)
 95 | 
 96 | print ll
 97 | print [l for w in ww for l in w]
 98 | 
 99 | """结构变形
100 | print[l 
101 |       for w in ww 
102 |          for l in w]
103 | 
104 | """
105 | ```
106 | 
107 | ---
108 | # 切片
109 | 注意参数start,end为列表下标：
110 | * `list[start:stop]` # [start,stop)
111 | * `list[:stop]`      # [0,end)
112 | * `list[:-1]`        #[0,len-1) 即返回列表的前len-1个元素
113 | * `list[start:]`     # [start,len)
114 | * `list[-1:]`        # [len-1,len)
115 | * `list[:]`          # [0,len) ,copy
116 | * `list[::2]`   # 先取2个元素组成元组,(0,1),(2,3),(4,5)，然后取下标为1的值 ==>[0,2,4]
117 | * `list[::-1]`  # 先取1个元素组成元组，(5,),(4,)(3,),(2,),(1,),(1,) >> [5,4,3,2,1,0] 列表反转的技巧
118 | 
119 | ### 拓展切片
120 | **注：list[i:j:stride]：**表示拓展切片，i表示起始索引，j表示终止索引，stride表示步长，stride默认为1,只能为非0整数
121 | * stride正数：切片从左往右切，切出[i:j),然后按照stride的值，进行获取，表示相隔(stride-1)个元素取出一个元素，组成新列表
122 | * stride负数：切片从右往左切，因为从右往左切，所以此时，j为起始索引，ｉ为终止索引，切出[j:i],所以"i的值要大于j",
123 |               注意此时起始下标默认为None,假如起始下标j=0，表示从第二个元素开始，包含终止索引然后按照stride的值，进行获取
124 | 
125 | **举例**：
126 | 
127 | 
128 | 
129 | # 列表的引用
130 | 如果你将列表list赋值给变量a,而将a赋值给b,则a,b两个变量指向的位置时一样的，所以你改变b时，a也会发生相同的变化
131 | ```python
132 | a = [1,2,3]
133 | b = a
134 | 
135 | print a     # [1,2,3]
136 | print b     # [1,2,3]
137 | 
138 | b.append(4) 
139 | print a     # [1,2,3,4]
140 | ```
141 | 如何避免不更改原来的列表?
142 | * 列表复制:`new_list = old_list[:]`,但这种方法只适用于简单列表，也就是列表中的元素都是基本类型，
143 | 如果列表元素还存在列表的话，这种方法就不适用了。原因就是，象a[:]这种处理，只是将列表元素的值生成一个新的列表，如果列表元素也是一个列表，如：a=[1,[2]]，那么这种复制对于元素[2]的处理只是复制[2]的引用，而并未生成 [2]的一个新的列表复制
144 | ```
145 | >>> a=[1,[2]]
146 | >>> b=a[:]
147 | >>> b
148 | [1, [2]]
149 | >>> a[1].append(3)
150 | >>> a
151 | [1, [2, 3]]
152 | >>> b
153 | [1, [2, 3]]
154 | ```
155 | * 使用copy模块中的deepcopy函数:
156 | ```
157 | >>> import copy
158 | >>> a=[1,[2]]
159 | >>> b=copy.deepcopy(a)
160 | >>> b
161 | [1, [2]]
162 | >>> a[1].append(3)
163 | >>> a
164 | [1, [2, 3]]
165 | >>> b
166 | [1, [2]]
167 | ```
168 | 
169 | # 转换
170 | “字符串”原理上其实是一个个字符的列表，所以 `str='ab0'; list=list(str) >> ['a','b',0]`,
171 | 所以对字符串进行截取时，可以先转换为列表处理，然后利用`join()`函数再转换为字符串
172 | * str 转 list : `list(str)`
173 | * list 转 str : `''.join(list)`
174 | 
175 | “字典”是成对存在的，所以要转换时，保证列表的对称
176 | * list 转 dict : 
177 | ```
178 | >>> list_k=['a','b','c','d']
179 | >>> list_v=[1,2,3,4]
180 | >>> dict(zip(list_k,list_v))
181 | {'a': 1, 'c': 3, 'b': 2, 'd': 4}
182 | >>> 
183 | ```
184 | 如果列表中元素为 元素个数为2的列表，可用直接使用dict(list)来转换，如`dict([['a',1],['b',2]])`
185 | 
186 | * dict 转 list :
187 | ```
188 | >>> keys=dict.keys()
189 | >>> keys
190 | ['a', 'c', 'b', 'd']
191 | >>> values=dict.values()
192 | >>> values
193 | [1, 3, 2, 4]
194 | >>>
195 | ```
196 | 元组()与set()可用直接使用同名函数，互相转换，如：`list((1,2))， tuple([1,2,3])`
197 | 
198 | 
199 | # 常用函数
200 | ### range()函数
201 | `range()`函数可以返回一个数值列表：`range(5)`返回`[0, 1, 2, 3, 4]`注意第一个元素是从0开始，
202 | 在python 终端，你可以使用`help(range)`查看`range()`函数的使用方法：
203 | ```
204 | range(...)
205 |     返回包含整数的列表。
206 | 
207 |     range(stop) -> 返回一个数值列表，stop指定终止值,但不包含
208 |     range(4) >> [0, 1, 2, 3]. 
209 |     
210 |     range(start, stop[, step]) -> 返回一个数值列表,可用start指定起始值，setp指定步长，当给定步长时，它指定增量（或递减）
211 |     range(i, j)   >> [i, i+1, i+2, ..., j-1]; start (!) 默认从0开始.
212 |     range(1,10,2) >> [1, 3, 5, 7, 9]
213 | 
214 | ```
215 | ### min()函数
216 | `min()`返回一个**可迭代**对象中最小的值或**传入多个参数**的最小参数，`help(min)`查看帮助文档
217 | ```
218 | min(...)
219 |     min(iterable[, key=func]) -> value
220 |     min(a, b, c, ...[, key=func]) -> value
221 |     
222 |     With a single iterable argument, return its smallest item.
223 |     With two or more arguments, return the smallest argument.
224 | (END)
225 | 
226 | ```
227 | 举例：
228 | ```
229 | >>> min([1,2,34])
230 | 1
231 | >>> min([1,2,34],0)
232 | 0
233 | >>> 
234 | >>> min(['a',2,34])
235 | 2
236 | 
237 | ```
238 | 
239 | # 技巧
240 | ### list列表想加
241 | ```
242 | list = list1+list2
243 | ```
244 | 
245 | 
246 | 
247 | 
248 | 


--------------------------------------------------------------------------------
/loop.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomoncle/Python-notes/ce675486290c3d1c7c2e4890b57e3d0c8a1228cc/loop.md


--------------------------------------------------------------------------------
/page_parser/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Time           : 17-8-8 下午2:19
4 | # @Author         : Tom.Lee
5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
6 | # @File           : __init__.py.py
7 | # @Product        : PyCharm
8 | 


--------------------------------------------------------------------------------
/page_parser/beautifulsoup/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Time           : 17-4-20 下午1:53
4 | # @Author         : Tom.Lee
5 | # @Description    : 
6 | # @File           : __init__.py.py
7 | # @Product        : PyCharm
8 | 


--------------------------------------------------------------------------------
/page_parser/beautifulsoup/parser.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Time    : 17-4-19 下午10:10
  4 | # @Author  : tom.lee
  5 | # @Site    : 解析器
  6 | # @File    : parser.py
  7 | # @Software: PyCharm
  8 | 
  9 | """
 10 | 支持多级条件查询：
 11 | """
 12 | 
 13 | import urlparse
 14 | 
 15 | import bs4
 16 | 
 17 | 
 18 | class HtmlParser(object):
 19 |     """
 20 |     网页解析器，可以继承此类，实现更复杂功能
 21 |     """
 22 |     url_filed = 'href'
 23 |     parser = 'html.parser'
 24 |     encoding = 'utf-8'
 25 | 
 26 |     def __init__(self, base_url=None):
 27 |         self.__base_url = base_url
 28 | 
 29 |     def simple_tags(self, data, tag=None, patterns=None, attributes=None):
 30 |         """
 31 |         单个标签解析
 32 |         """
 33 |         tags = self.__parser_tags(data, tag, patterns)
 34 |         return self.__tags(tags, attributes)
 35 | 
 36 |     def multilevel_tags(self, data, multilevel_patterns=None, attributes=None):
 37 |         """
 38 |         多标签解析
 39 |         examples : div .a
 40 |         """
 41 |         if not multilevel_patterns:
 42 |             return data
 43 | 
 44 |         for tag_patterns in multilevel_patterns:
 45 |             tag, patterns = tag_patterns.items()[0]
 46 |             data = self.__parser_tags(data, tag, patterns)
 47 |             multilevel_patterns.remove(tag_patterns)
 48 | 
 49 |             if not multilevel_patterns:
 50 |                 return self.__tags(data, attributes)
 51 | 
 52 |             return self.multilevel_tags(data, multilevel_patterns, attributes)
 53 | 
 54 |     def element(self, data, tag=None, patterns=None):
 55 |         """
 56 |         查询符合条件的第一个标签元素
 57 |         """
 58 |         elements = self.elements(data, tag, patterns)
 59 |         return elements[0] if elements else None
 60 | 
 61 |     def elements(self, data, tag=None, patterns=None):
 62 |         """
 63 |         元素集合
 64 |         """
 65 |         return self.__parser_tags(data, tag, patterns)
 66 | 
 67 |     def __tags(self, data, attributes=None):
 68 |         """
 69 |         标签列表
 70 |         """
 71 |         tags = [dict(tag_.attrs, text=tag_.getText()) for tag_ in data]
 72 | 
 73 |         if not attributes:
 74 |             return tags
 75 | 
 76 |         for tag_attr in tags:
 77 |             for k, v in tag_attr.items():
 78 |                 if k in attributes:
 79 |                     continue
 80 |                 tag_attr.pop(k)
 81 | 
 82 |         if self.__base_url:
 83 |             return self.__format_url(tags)
 84 | 
 85 |         return tags
 86 | 
 87 |     def __parser_tags(self, data, tag=None, patterns=None):
 88 |         """
 89 |         返回查询对象列表
 90 |         """
 91 |         return self.__data_parser(data).find_all(tag, patterns)
 92 | 
 93 |     def __data_parser(self, data):
 94 |         if isinstance(data,(str,unicode)):
 95 |             bs4_str = data
 96 |         elif isinstance(data, bs4.element.ResultSet):
 97 |             bs4_str = ' '.join([str(_tag) for _tag in data])
 98 |         else:
 99 |             bs4_str = str(data)
100 |         return bs4.BeautifulSoup(bs4_str, self.parser, from_encoding=self.encoding)
101 | 
102 |     def __format_url(self, maps):
103 |         for m in maps:
104 |             if not m.get(self.url_filed):
105 |                 continue
106 |             m[self.url_filed] = urlparse.urljoin(
107 |                 self.__base_url, m.get(self.url_filed)
108 |             )
109 |         return maps
110 | 
111 | 
112 | if __name__=='__main__':
113 |     doc = """
114 |     <html><head><title>The Dormouse's story</title></head>
115 |     <body>
116 |     <p class="title"><b>The Dormouse's story</b></p>
117 |     
118 |     <p class="story">Once upon a time there were three little sisters; and their names were
119 |     <a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
120 |     <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
121 |     <a href="http://example.com/tillie" class="sister" id="link3">标题</a>;
122 |     and they lived at the bottom of a well.</p>
123 |     
124 |     <p class="story">...</p>
125 |     """
126 |     for s in HtmlParser().multilevel_tags(doc, [{'a': {'class': 'sister'}}]):
127 |         print s['text']
128 | 


--------------------------------------------------------------------------------
/page_parser/beautifulsoup/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-4-20 下午1:53
 4 | # @Author         : Tom.Lee
 5 | # @Description    : 
 6 | # @File           : test.py
 7 | # @Product        : PyCharm
 8 | 
 9 | import re
10 | import urlparse
11 | 
12 | import bs4
13 | 
14 | html_doc = """
15 | <html><head><title>The Dormouse's story</title></head>
16 | <body>
17 | <p class="title"><b>The Dormouse's story</b></p>
18 | 
19 | <p class="story">Once upon a time there were three little sisters; and their names were
20 | <a href="http://example.com/elsie" class="sister" id="link1">Elsie</a>,
21 | <a href="http://example.com/lacie" class="sister" id="link2">Lacie</a> and
22 | <a href="http://example.com/tillie" class="sister" id="link3">标题</a>;
23 | and they lived at the bottom of a well.</p>
24 | 
25 | <p class="story">...</p>
26 | """
27 | # print html_doc
28 | 
29 | 
30 | soup = bs4.BeautifulSoup(html_doc, "html.parser", from_encoding='utf-8')
31 | pattern = {'class': 'sister', 'id': re.compile(r'^link\d+$')}
32 | data = soup.find_all('a', pattern)
33 | print data
34 | 
35 | # 对象属性
36 | print data[0]
37 | print data[2].getText()
38 | print data[0].contents
39 | print data[0].attrs
40 | 
41 | 
42 | #
43 | # tags_a = soup.find_all(name='a', attrs={'class': 'sister'})
44 | # for a in tags_a:
45 | #     print type(a), a.get('id'), a.get('href'), a.get('no_found')
46 | #     print dict(a.attrs, tag_name=a.getText())
47 | print isinstance(data,bs4.element.ResultSet)
48 | bs4_str = ' '.join([str(_tag) for _tag in data])
49 | sp = bs4.BeautifulSoup(bs4_str, "html.parser", from_encoding='utf-8')
50 | data = sp.find_all('a', pattern)
51 | print data
52 | print data[2].getText()
53 | 
54 | 
55 | # 字符串拼接
56 | # print urlparse.urljoin('https://www.baidu.com', '//www.ji.com')


--------------------------------------------------------------------------------
/page_parser/beautifulsoup/test_403.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-7-25 下午3:17
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : test_403.py
 7 | # @Product        : PyCharm
 8 | 
 9 | import bs4
10 | 
11 | t403 = """
12 | <html>
13 |  <head>
14 |   <title>403 Forbidden</title>
15 |  </head>
16 |  <body>
17 |   <h1>403 Forbidden</h1>
18 |   资源 bc6d81de-97af-4ebd-b01a-b23a6567bea2 is protected and cannot be deleted.<br /><br />
19 | 
20 | 
21 | 
22 |  </body>
23 | </html>
24 | """
25 | soup = bs4.BeautifulSoup(t403, "html.parser", from_encoding='utf-8')
26 | 
27 | title = soup.find('title')
28 | body = soup.find('body')
29 | title_text = title.getText()
30 | body_text = body.getText().replace(title_text, '').replace('\n', '')
31 | 
32 | print {title_text.split(' ')[-1]: {'message': body_text, 'code': 1}}
33 | 


--------------------------------------------------------------------------------
/page_parser/xpath/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Time    : 17-5-6 下午1:10
4 | # @Author  : tom.lee
5 | # @Site    : 
6 | # @File    : __init__.py.py
7 | # @Software: PyCharm
8 | 


--------------------------------------------------------------------------------
/page_parser/xpath/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 17-5-6 下午1:10
 4 | # @Author  : tom.lee
 5 | # @Site    : 
 6 | # @File    : test.py
 7 | # @Software: PyCharm
 8 | 
 9 | 
10 | from lxml import etree
11 | 
12 | f = open('file.txt')
13 | content = f.read()
14 | selector = etree.HTML(content)
15 | 
16 | divs = selector.xpath('//div[@class="site-item "]/div[@class="title-and-desc"]')
17 | for r in divs:
18 |     item_ = None or {}
19 |     item_['title'] = r.xpath('a/div/text()')[0]
20 |     item_['link'] = r.xpath('a/@href')[0]
21 |     item_['desc'] = r.xpath('div/text()')[0].replace('\n', '').strip()
22 |     print item_
23 | 


--------------------------------------------------------------------------------
/rpc/RPyC/demo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-8-13 下午1:31
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017
 6 | # @File           : demo.py
 7 | # @Product        : PyCharm
 8 | # @Docs           : 
 9 | # @Source         : 
10 | 
11 | import rpyc
12 | from rpyc.utils.server import ThreadedServer
13 | 
14 | 
15 | class MyService(rpyc.Service):
16 |     data = []
17 | 
18 |     def exposed_save_data(self, d):
19 |         self.data.append(d)
20 | 
21 |     def exposed_get_data(self):
22 |         return self.data
23 | 
24 | 
25 | class MyClient(object):
26 |     @classmethod
27 |     def conn(cls):
28 |         connections = rpyc.connect('localhost', 15111)
29 |         connections.root.save_data(123)
30 |         print connections.root.get_data()
31 | 
32 | 
33 | if __name__ == '__main__':
34 |     import threading
35 |     import time
36 | 
37 |     server = ThreadedServer(MyService, port=15111)
38 |     client = MyClient()
39 | 
40 | 
41 |     def start():
42 |         print '*************************************'
43 |         print '*************************************'
44 |         print '*****************RpyC****************'
45 |         print '************           **************'
46 |         print '*************************************'
47 |         print '***************start server**********'
48 |         print '*************************************'
49 |         print '*************************************\n\n'
50 |         server.start()
51 | 
52 |     threading.Thread(target=start).start()
53 | 
54 |     client.conn()
55 |     time.sleep(5)
56 | 
57 |     server.close()
58 |     print 'service stop.'
59 | 


--------------------------------------------------------------------------------
/rpc/RPyC/tutorials/part01.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-8-13 下午3:37
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : part01.py
 7 | # @Product        : PyCharm
 8 | # @Source         :
 9 | 
10 | """
11 | rpyc 客户端查询服务端信息
12 | 
13 | """
14 | 
15 | import os
16 | 
17 | import rpyc
18 | 
19 | # 打印当前路径
20 | print os.getcwd()
21 | 
22 | # 启动内置服务
23 | # os.system('python /usr/local/bin/rpyc_classic.py')
24 | """
25 | CMD:
26 |     tom@aric-ThinkPad-E450:~$ python /usr/local/bin/rpyc_classic.py
27 |     INFO:SLAVE/18812:server started on [0.0.0.0]:18812
28 | """
29 | 
30 | # 连接服务器
31 | conn = rpyc.classic.connect('localhost')
32 | 
33 | mod1 = conn.modules.sys  # 访问服务器上的sys模块
34 | mod2 = conn.modules["xml.dom.minidom"]  # 访问服务器上的xml.dom.minidom模块
35 | print mod1, mod2
36 | 
37 | # 打印服务器启动路径
38 | print conn.modules.os.getcwd()
39 | 


--------------------------------------------------------------------------------
/rpc/RPyC/tutorials/services/registry_discovery/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-8-15 下午1:27
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : __init__.py.py
 7 | # @Product        : PyCharm
 8 | # @Docs           : 
 9 | # @Source         : 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/rpc/RPyC/tutorials/services/registry_discovery/client_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-8-15 下午2:00
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : client_test.py
 7 | # @Product        : PyCharm
 8 | # @Docs           : 
 9 | # @Source         : 
10 | 
11 | 
12 | import rpyc
13 | from rpyc.utils.registry import UDPRegistryClient
14 | 
15 | 
16 | def service01():
17 |     conn = rpyc.connect(host='localhost', port=18861)
18 |     root = conn.root  # MyService object
19 |     # object
20 |     print root
21 | 
22 |     print root.get_service_name()
23 |     print root.get_service_aliases()
24 | 
25 |     # custom method
26 |     print root.get_answer()  # 66
27 |     print root.exposed_get_answer()  # 66
28 |     # print root.get_question()  # AttributeError: cannot access 'get_question'
29 | 
30 |     registrar = UDPRegistryClient()
31 |     list_of_servers = registrar.discover("foo")
32 |     print rpyc.discover(service_name='MY', host='localhost')
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     service01()
37 | 


--------------------------------------------------------------------------------
/rpc/RPyC/tutorials/services/registry_discovery/service01.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-8-15 下午1:35
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : service.py
 7 | # @Product        : PyCharm
 8 | # @Docs           : 
 9 | # @Source         : 
10 | 
11 | import rpyc
12 | from rpyc.utils.server import ThreadedServer
13 | 
14 | 
15 | 
16 | class MyService(rpyc.Service):
17 | 
18 |     def on_connect(self):
19 |         pass
20 | 
21 |     def on_disconnect(self):
22 |         pass
23 | 
24 |     @classmethod
25 |     def exposed_get_answer(cls):
26 |         return 66
27 | 
28 |     @classmethod
29 |     def get_question(cls):
30 |         return "what is the airspeed velocity of an unladen swallow?"
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     t = ThreadedServer(MyService, port=18861)
35 |     print """
36 |     service start ok! port {port}
37 |     """.format(port=18861)
38 |     t.start()
39 | 


--------------------------------------------------------------------------------
/scheduler_task/study_apscheduler/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Time           : 17-8-8 下午2:28
4 | # @Author         : Tom.Lee
5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
6 | # @File           : __init__.py.py
7 | # @Product        : PyCharm
8 | 


--------------------------------------------------------------------------------
/scheduler_task/study_apscheduler/examples/demo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-8-13 上午11:33
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : demo.py
 7 | # @Product        : PyCharm
 8 | # @Docs           : 
 9 | # @Source         : 
10 | 
11 | 
12 | import os
13 | 
14 | from apscheduler.schedulers.blocking import BlockingScheduler
15 | 
16 | if __name__ == '__main__':
17 |     scheduler = BlockingScheduler()
18 |     scheduler.add_job('sys:stdout.write', 'interval', seconds=3, args=['tick ...\n'])
19 |     print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C'))
20 | 
21 |     try:
22 |         scheduler.start()
23 |     except (KeyboardInterrupt, SystemExit):
24 |         pass
25 | 


--------------------------------------------------------------------------------
/scheduler_task/study_apscheduler/examples/executors/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Time           : 17-8-13 上午11:07
4 | # @Author         : Tom.Lee
5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
6 | # @File           : __init__.py.py
7 | # @Product        : PyCharm
8 | 


--------------------------------------------------------------------------------
/scheduler_task/study_apscheduler/examples/executors/configure.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-8-9 上午9:56
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017
 6 | # @File           : job_configure.py
 7 | # @Product        : PyCharm
 8 | 
 9 | 
10 | from apscheduler.executors.pool import ThreadPoolExecutor, ProcessPoolExecutor
11 | from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
12 | from apscheduler.schedulers.background import BackgroundScheduler
13 | from pytz import utc
14 | 
15 | job_stores = {
16 |     'default': SQLAlchemyJobStore(
17 |         url='mysql+mysqldb://root:root@localhost:3306/djangoapp?charset=utf8')
18 | }
19 | 
20 | 
21 | executors = {
22 |     'default': ThreadPoolExecutor(20),
23 |     'processpool': ProcessPoolExecutor(5)
24 | }
25 | 
26 | 
27 | job_defaults = {
28 |     'coalesce': False,
29 |     'max_instances': 3
30 | }
31 | 
32 | # UTC as the scheduler’s timezone
33 | scheduler = BackgroundScheduler(
34 |     jobstores=job_stores,
35 |     executors=executors,
36 |     job_defaults=job_defaults,
37 |     timezone=utc,
38 |     daemon=False
39 | )
40 | 
41 | 
42 | def current_time():
43 |     import datetime
44 |     return datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
45 | 
46 | 
47 | def job1():
48 |     print 'job1 is running, Now is %s' % current_time()
49 | 
50 | 
51 | def job2():
52 |     print 'job2 is running, Now is %s' % current_time()
53 | 
54 | 
55 | # 每隔5秒运行一次job1,replace_existing=True防止添加重复，启动失败
56 | scheduler.add_job(job1, 'interval', seconds=5, id='job1', replace_existing=True)
57 | # 每隔5秒运行一次job2
58 | scheduler.add_job(job2, 'cron', second='*/5', id='job2', replace_existing=True)
59 | scheduler.start()
60 | print scheduler.get_jobs()
61 | 


--------------------------------------------------------------------------------
/scheduler_task/study_apscheduler/examples/executors/process_pool.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-8-13 上午10:56
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017
 6 | # @File           : process_pool.py
 7 | # @Product        : PyCharm
 8 | 
 9 | from datetime import datetime
10 | import os
11 | 
12 | from apscheduler.schedulers.blocking import BlockingScheduler
13 | 
14 | 
15 | def tick():
16 |     print('Tick! The time is: %s' % datetime.now())
17 | 
18 | 
19 | if __name__ == '__main__':
20 |     scheduler = BlockingScheduler()
21 |     scheduler.add_executor('processpool')
22 |     scheduler.add_job(tick, 'interval', seconds=3)
23 |     print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C'))
24 | 
25 |     try:
26 |         scheduler.start()
27 |     except (KeyboardInterrupt, SystemExit):
28 |         pass


--------------------------------------------------------------------------------
/scheduler_task/study_apscheduler/examples/executors/simple.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-7-22 上午10:29
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017
 6 | # @File           : t.py
 7 | # @Product        : PyCharm
 8 | 
 9 | 
10 | import datetime
11 | 
12 | from apscheduler.executors.pool import ThreadPoolExecutor, ProcessPoolExecutor
13 | from apscheduler.schedulers.blocking import BlockingScheduler
14 | 
15 | scheduler = BlockingScheduler()
16 | 
17 | def current_time():
18 |     return datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
19 | 
20 | 
21 | # ###################使用add_job运行任务#################
22 | 
23 | def job1():
24 |     print 'job1 is running, Now is %s' % current_time()
25 | 
26 | 
27 | def job2():
28 |     print 'job2 is running, Now is %s' % current_time()
29 | 
30 | 
31 | # 每隔5秒运行一次job1
32 | scheduler.add_job(job1, 'interval', seconds=5, id='job1')
33 | # 每隔5秒运行一次job2
34 | scheduler.add_job(job2, 'cron', second='*/5', id='job2')
35 | 
36 | 
37 | # ###################使用装饰器添加任务#################
38 | 
39 | # 每隔5秒运行一次job3
40 | @scheduler.scheduled_job('interval', seconds=5, id='job3')
41 | def job3():
42 |     print 'job3 is running, Now is %s' % current_time()
43 | 
44 | 
45 | # 每隔5秒运行一次job4
46 | @scheduler.scheduled_job('cron', second='*/5', id='job4')
47 | def job4():
48 |     print 'job4 is running, Now is %s' % current_time()
49 | 
50 | 
51 | executors = {
52 |     'processpool': ProcessPoolExecutor(5),
53 |     'default': ThreadPoolExecutor(20)
54 | 
55 | }
56 | job_defaults = {
57 |     'coalesce': False,
58 |     'max_instances': 5
59 | }
60 | scheduler.configure(executors=executors, job_defaults=job_defaults)
61 | scheduler.start()
62 | 


--------------------------------------------------------------------------------
/scheduler_task/study_apscheduler/examples/jobstores/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Time           : 17-8-13 上午11:09
4 | # @Author         : Tom.Lee
5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
6 | # @File           : __init__.py.py
7 | # @Product        : PyCharm
8 | 


--------------------------------------------------------------------------------
/scheduler_task/study_apscheduler/examples/jobstores/job_store.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-8-13 上午11:14
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017
 6 | # @File           : mongodb.py
 7 | # @Product        : PyCharm
 8 | # @Docs           : 
 9 | 
10 | 
11 | from datetime import datetime, timedelta
12 | import sys
13 | import os
14 | 
15 | from apscheduler.schedulers.blocking import BlockingScheduler
16 | 
17 | 
18 | def alarm(time):
19 |     print('Alarm! This alarm was scheduled at %s.' % time)
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     scheduler = BlockingScheduler()
24 |     # ################ mongodb
25 |     scheduler.add_jobstore('mongodb', collection='example_jobs')
26 | 
27 |     if len(sys.argv) > 1 and sys.argv[1] == '--clear':
28 |         scheduler.remove_all_jobs()
29 | 
30 |     alarm_time = datetime.now() + timedelta(seconds=10)
31 |     scheduler.add_job(alarm, 'date', run_date=alarm_time, args=[datetime.now()])
32 |     print('To clear the alarms, run this example with the --clear argument.')
33 |     print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C'))
34 | 
35 |     try:
36 |         scheduler.start()
37 |     except (KeyboardInterrupt, SystemExit):
38 |         pass
39 | 


--------------------------------------------------------------------------------
/scheduler_task/study_apscheduler/examples/jobstores/log.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-8-13 上午11:29
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : log.py
 7 | # @Product        : PyCharm
 8 | # @Docs           : 
 9 | # @Source         : 
10 | import logging
11 | 
12 | log = logging.getLogger('apscheduler.executors.default')
13 | log.setLevel(logging.INFO)  # DEBUG
14 | # 设定日志格式
15 | fmt = logging.Formatter('%(levelname)s:%(name)s:%(message)s')
16 | handler = logging.StreamHandler()
17 | handler.setFormatter(fmt)
18 | log.addHandler(handler)
19 | 


--------------------------------------------------------------------------------
/scheduler_task/study_apscheduler/examples/schedules/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-8-13 上午11:39
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : __init__.py.py
 7 | # @Product        : PyCharm
 8 | # @Docs           : 
 9 | # @Source         : 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/scheduler_task/study_apscheduler/examples/schedules/schdule.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Time           : 17-8-13 上午11:40
  4 | # @Author         : Tom.Lee
  5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
  6 | # @File           : asyncio.py
  7 | # @Product        : PyCharm
  8 | # @Docs           : 
  9 | 
 10 | import os
 11 | import time
 12 | from datetime import datetime
 13 | 
 14 | 
 15 | def asyncio_schedule():
 16 |     """
 17 |     python version >= 3.4.0
 18 |     :return:
 19 |     """
 20 |     from apscheduler.schedulers.asyncio import AsyncIOScheduler
 21 |     try:
 22 |         import asyncio
 23 |     except ImportError:
 24 |         import trollius as asyncio
 25 | 
 26 |     def tick():
 27 |         print('Tick! The time is: %s' % datetime.now())
 28 | 
 29 |     scheduler = AsyncIOScheduler()
 30 |     scheduler.add_job(tick, 'interval', seconds=3)
 31 |     scheduler.start()
 32 |     print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C'))
 33 | 
 34 |     # Execution will block here until Ctrl+C (Ctrl+Break on Windows) is pressed.
 35 |     try:
 36 |         asyncio.get_event_loop().run_forever()
 37 |     except (KeyboardInterrupt, SystemExit):
 38 |         pass
 39 | 
 40 | 
 41 | def background_schedule():
 42 |     from apscheduler.schedulers.background import BackgroundScheduler
 43 | 
 44 |     def tick():
 45 |         print('Tick! The time is: %s' % datetime.now())
 46 | 
 47 |     scheduler = BackgroundScheduler()
 48 |     scheduler.add_job(tick, 'interval', seconds=3)
 49 |     scheduler.start()
 50 |     print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C'))
 51 | 
 52 |     try:
 53 |         # This is here to simulate application activity (which keeps the main thread alive).
 54 |         while True:
 55 |             time.sleep(2)
 56 |     except (KeyboardInterrupt, SystemExit):
 57 |         # Not strictly necessary if daemonic mode is enabled but should be done if possible
 58 |         scheduler.shutdown()
 59 | 
 60 | 
 61 | def blocking_schedule():
 62 |     from apscheduler.schedulers.blocking import BlockingScheduler
 63 | 
 64 |     def tick():
 65 |         print('Tick! The time is: %s' % datetime.now())
 66 | 
 67 |     scheduler = BlockingScheduler()
 68 |     scheduler.add_job(tick, 'interval', seconds=3)
 69 |     print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C'))
 70 | 
 71 |     try:
 72 |         scheduler.start()
 73 |     except (KeyboardInterrupt, SystemExit):
 74 |         pass
 75 | 
 76 | 
 77 | def gevent_schedule():
 78 |     from apscheduler.schedulers.gevent import GeventScheduler
 79 | 
 80 |     def tick():
 81 |         print('Tick! The time is: %s' % datetime.now())
 82 | 
 83 |     scheduler = GeventScheduler()
 84 |     scheduler.add_job(tick, 'interval', seconds=3)
 85 |     g = scheduler.start()  # g is the greenlet that runs the scheduler loop
 86 |     print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C'))
 87 | 
 88 |     # Execution will block here until Ctrl+C (Ctrl+Break on Windows) is pressed.
 89 |     try:
 90 |         g.join()
 91 |     except (KeyboardInterrupt, SystemExit):
 92 |         pass
 93 | 
 94 | 
 95 | def qt_schedule():
 96 |     import signal
 97 |     import sys
 98 |     from apscheduler.schedulers.qt import QtScheduler
 99 | 
100 |     try:
101 |         from PyQt5.QtWidgets import QApplication, QLabel
102 |     except ImportError:
103 |         try:
104 |             from PyQt4.QtGui import QApplication, QLabel
105 |         except ImportError:
106 |             from PySide.QtGui import QApplication, QLabel
107 | 
108 |     def tick():
109 |         label.setText('Tick! The time is: %s' % datetime.now())
110 | 
111 |     app = QApplication(sys.argv)
112 | 
113 |     # This enables processing of Ctrl+C keypresses
114 |     signal.signal(signal.SIGINT, lambda *args: QApplication.quit())
115 | 
116 |     label = QLabel('The timer text will appear here in a moment!')
117 |     label.setWindowTitle('QtScheduler example')
118 |     label.setFixedSize(280, 50)
119 |     label.show()
120 | 
121 |     scheduler = QtScheduler()
122 |     scheduler.add_job(tick, 'interval', seconds=3)
123 |     scheduler.start()
124 | 
125 |     # Execution will block here until the user closes the windows or Ctrl+C is pressed.
126 |     app.exec_()
127 | 
128 | 
129 | def tornado_schedule():
130 |     from tornado.ioloop import IOLoop
131 |     from apscheduler.schedulers.tornado import TornadoScheduler
132 | 
133 |     def tick():
134 |         print('Tick! The time is: %s' % datetime.now())
135 | 
136 |     scheduler = TornadoScheduler()
137 |     scheduler.add_job(tick, 'interval', seconds=3)
138 |     scheduler.start()
139 |     print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C'))
140 | 
141 |     # Execution will block here until Ctrl+C (Ctrl+Break on Windows) is pressed.
142 |     try:
143 |         IOLoop.instance().start()
144 |     except (KeyboardInterrupt, SystemExit):
145 |         pass
146 | 
147 | 
148 | def twisted_schedule():
149 |     from twisted.internet import reactor
150 |     from apscheduler.schedulers.twisted import TwistedScheduler
151 | 
152 |     def tick():
153 |         print('Tick! The time is: %s' % datetime.now())
154 | 
155 |     scheduler = TwistedScheduler()
156 |     scheduler.add_job(tick, 'interval', seconds=3)
157 |     scheduler.start()
158 |     print('Press Ctrl+{0} to exit'.format('Break' if os.name == 'nt' else 'C'))
159 | 
160 |     # Execution will block here until Ctrl+C (Ctrl+Break on Windows) is pressed.
161 |     try:
162 |         reactor.run()
163 |     except (KeyboardInterrupt, SystemExit):
164 |         pass
165 | 
166 | 
167 | if __name__ == '__main__':
168 |     # gevent_schedule()
169 |     # twisted_schedule()
170 |     tornado_schedule()
171 |     print 123
172 |     pass
173 | 


--------------------------------------------------------------------------------
/scheduler_task/study_apscheduler/tutorials/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Time           : 17-8-13 上午10:56
4 | # @Author         : Tom.Lee
5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
6 | # @File           : __init__.py.py
7 | # @Product        : PyCharm
8 | 


--------------------------------------------------------------------------------
/set.md:
--------------------------------------------------------------------------------
 1 | # Python set集合
 2 | python的set集合是是一组无序排列，可哈希的值，和字典的key是一样的,它初始化时会声明一个空列表，然后将元素加入，
 3 | 所以使用终端打印时，set返回结果是`set([])`
 4 | * 支持：集合关系测试，成员关系测试，可迭代
 5 | * 不支持：索引，元素获取，切片
 6 | * 类型：可变 `set()` , 不可变 `frozenset()`
 7 | 
 8 | # 语法
 9 | ### 声明
10 | 因为set()集合返回的是以列表的形式返回，所以初始化时，如果直接赋值字符串，它会把字符串转换为列表，然后加入
11 | * `s=set()`       :声明一个空set集合 >> set([])
12 | * `s=set('abc')`  :声明并赋值set集合 >> set(['a','c','b'])
13 | * `s=set(['abc'])`:声明并赋值set集合 >> set(['abc'])
14 | 
15 | ### 添加
16 | 因为set是无序唯一集合，所以添加重复元素是不能再次加入的
17 | * `set.append(val)` :添加元素到set集合
18 | * `set.update(val)` :添加的对象必须是可以迭代的对象，str,[],{}等，不能是int类型，它会首先把对象进行迭代然后加入到set集合中，
19 | 注意的是，对于{}字典类型，它只会操作keys.
20 | ```python
21 | >>> s=set()
22 | >>> s.add(1)
23 | >>> s
24 | set([1])
25 | >>> s.update('abc')
26 | >>> s
27 | set(['a', 1, 'c', 'b'])
28 | >>> s.update([90,100])
29 | >>> s
30 | set(['a', 1, 'c', 'b', 100, 90])
31 | >>> s.update({'key1':1,'key2':2})
32 | >>> s
33 | set(['a', 1, 'c', 'b', 100, 'key2', 'key1', 90])
34 | >>> s.add(1)
35 | >>> s
36 | set(['a', 1, 'c', 'b', 100, 'key2', 'key1', 90])
37 | ```
38 | ### 删除
39 | `s.remove(val)`使用remove()方法进行元素的删除
40 | 
41 | ### 集合操作
42 | 集合的交集、合集（并集）、差集
43 | ```
44 | 举例：a=set([1,2,3]) ,b=set([3,4,5])
45 | python符号     含义                举例
46 |    -           差集，相对补集       a-b  >>set([1, 2])
47 |    &           交集                a&b  >>set([3])
48 |    |           合集，并集           a|b  >>set([1, 2, 3, 4, 5])
49 |    !=          不等于              a!=b >>True
50 |    ==          等于                a==b >>False
51 |    in          是成员              1 in a >> True
52 |    not in      不是成员            1 not in a >> False
53 | 
54 | ```
55 | 
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/skills/README.md:
--------------------------------------------------------------------------------
 1 | # python 中一些常用的技巧
 2 | 
 3 | ##### 转义
 4 | * Python中`"%"`的转义是`"%%"`
 5 | 
 6 | ##### float类型保留小数：
 7 | ```python
 8 | b = float('%0.6f'%0.12345678)
 9 | print b   #0.123457
10 | ```
11 | 
12 | ##### url检测
13 | ```python
14 | url = 'https://www.baidu.com/'
15 | 
16 | print url.rstrip('/')
17 | print url.rstrip('/')+'/home'
18 | ```
19 | 
20 | ##### url 转换函数
21 | ```python
22 | import urlparse
23 | urlparse.urljoin('http://www.aa.com:90/aa.html', '/abc.html')
24 | # http://www.aa.com:90/abc.html
25 | 
26 | 
27 | from __future__ import print_function
28 | 
29 | import urlparse
30 | 
31 | u = urlparse.urlparse('029_t002661t9gt.321002.2.ts?index=29&start=310000&end=320400')
32 | query_params = dict([s.split('=') for s in u.query.split('&')])
33 | print('query_params : {}'.format(query_params))
34 | # query_params : {'index': '29', 'end': '320400', 'start': '310000'}
35 | ```
36 | 
37 | ##### 去空格
38 | ```python
39 | s = ' 1 2 3 4 5 6 '
40 | 
41 | print '|%s|' % s.lstrip(' ')  # 去除左边空格 |1 2 3 4 5 6 |
42 | print '|%s|' % s.rstrip(' ')  # 去除右边空格 | 1 2 3 4 5 6|
43 | print '|%s|' % s.strip(' ')  # 去除两边空格  |1 2 3 4 5 6|
44 | print '|%s|' % s.replace(' ', '')  # 去除所有空格 |123456|
45 | ```
46 | 
47 | ##### 显示有限的接口到外部
48 | ```
49 | 当发布python第三方package时, 并不希望代码中所有的函数或者class可以被外部import,
50 | 在__init__.py中添加__all__属性,
51 | 该list中填写可以import的类或者函数名, 可以起到限制的import的作用, 防止外部import其他函数或者类
52 | ```
53 | 
54 | ```python
55 | #!/usr/bin/env python
56 | # -*- coding: utf-8 -*-
57 | from base import utils
58 | 
59 | __all__ = ['utils']
60 | ```
61 | 
62 | ---
63 | ## Python博客
64 | * 关于`raw_input()` 和 `input()` ：http://www.cnblogs.com/way_testlife/archive/2011/03/29/1999283.html
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/skills/async_call.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: UTF-8 -*-
 3 | 
 4 | import logging
 5 | import Queue
 6 | import threading
 7 | 
 8 | 
 9 | def func_a(a, b):
10 |     return a + b
11 | 
12 | 
13 | def func_b():
14 |     pass
15 | 
16 | 
17 | def func_c(a, b, c):
18 |     return a, b, c
19 | 
20 | 
21 | # 异步任务队列
22 | _task_queue = Queue.Queue()
23 | 
24 | 
25 | def async_call(function, callback, *args, **kwargs):
26 |     _task_queue.put({
27 |         'function': function,
28 |         'callback': callback,
29 |         'args': args,
30 |         'kwargs': kwargs
31 |     })
32 | 
33 | 
34 | def _task_queue_consumer():
35 |     """
36 |     异步任务队列消费者
37 |     """
38 |     print '_task_queue_consumer'
39 |     while True:
40 |         try:
41 |             task = _task_queue.get()
42 |             function = task.get('function')
43 |             callback = task.get('callback')
44 |             args = task.get('args')
45 |             kwargs = task.get('kwargs')
46 |             try:
47 |                 if callback:
48 |                     callback(function(*args, **kwargs))
49 |             except Exception as ex:
50 |                 logging.error(ex)
51 |                 if callback:
52 |                     callback(ex)
53 |             finally:
54 |                 _task_queue.task_done()
55 |         except Exception as ex:
56 |             # logging.warning(ex)
57 |             logging.error(ex),"""***************"""
58 | 
59 | 
60 | def handle_result(result):
61 |     print result
62 |     print(type(result), result)
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     t = threading.Thread(target=_task_queue_consumer)
67 |     t.daemon = True
68 |     t.start()
69 | 
70 |     async_call(func_a, handle_result, 1, 2)
71 |     async_call(func_b, handle_result)
72 |     async_call(func_c, handle_result, 1, 2, 3)
73 |     async_call(func_c, handle_result, 1, 2, 3, 4)
74 | 
75 |     _task_queue.join()
76 | 


--------------------------------------------------------------------------------
/skills/download_music.py:
--------------------------------------------------------------------------------
 1 | # -*- coding=utf-8 -*-
 2 | 
 3 | import requests
 4 | import json
 5 | import urllib
 6 | import urllib2
 7 | 
 8 | # 查看外链生成器的　url 方法
 9 | data = requests.post(
10 |     url='http://music.163.com/weapi/song/enhance/player/url',
11 |     data={
12 |         'params': 'iPslhRDpIz9eXPL6tuauyLF7NSU7yIetfu0vlx7lzfhlZGm21DICXpBCTmAqdiznd6LMnR6bGegIXGWyjNcHaOpjUC4E9ZLNt42hmQnA6QnYwitYsvs6CrKuXFp8pCJb',
13 |         'encSecKey': '47911c978b596e8c832e76ae96c0660ef6380d7f9e71c56e3ce7d90cf6978b385a6c5cba169cdf74d39cecae564cdaedfbc28e65cef01fbaeb3e0d27c228d6b0a63151ecb2d16a920eb37608d173c5824aa689dbfdb4fce2877df3702eb70cff009a20b84f94ca581b09f0c4840d51881af7702cf07a26e8a16f647739006ff0'
14 |     },
15 | )
16 | 
17 | # print json.loads(data.content)
18 | if data.content:
19 |     url = json.loads(data.content).get('data')[0].get('url')
20 |     print url
21 |     # download 1
22 |     # urllib.urlretrieve(url, url.split('/')[-1])
23 | 
24 |     # download 2
25 |     # r = requests.get(url)
26 |     # with open(url.split('/')[-1], "wb") as code:
27 |     #     code.write(r.content)
28 | 
29 |     # download 3
30 |     f = urllib2.urlopen(url)
31 |     with open(url.split('/')[-1], "wb") as code:
32 |         code.write(f.read())
33 | 


--------------------------------------------------------------------------------
/skills/httpserver.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-4-24 下午2:22
 4 | # @Author         : Tom.Lee
 5 | # @Description    : 
 6 | # @File           : httpserver.py
 7 | # @Product        : PyCharm
 8 | 
 9 | """
10 | python 服务器
11 | """
12 | import socket
13 | import select
14 | import sys
15 | from wsgiref.simple_server import make_server
16 | 
17 | """
18 | 用标准库的wsgiref单独起一个服务器监听端口
19 | """
20 | 
21 | 
22 | def simple_app(environ, start_response):
23 |     status = '200 OK'
24 |     response_headers = [('Content-type', 'text/plain')]
25 |     start_response(status, response_headers)
26 |     # print environ, start_response
27 |     return ['Hello world!\n']
28 | 
29 | 
30 | # httpd = make_server('', 10001, simple_app)
31 | # httpd.serve_forever()
32 | 
33 | """
34 | 代理服务器
35 | """
36 | 
37 | to_addr = ('127.0.0.1', 10001)  # 转发的地址
38 | 
39 | 
40 | class Proxy:
41 |     def __init__(self, addr):
42 |         self.proxy = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
43 |         self.proxy.bind(addr)
44 |         self.proxy.listen(10)
45 |         self.inputs = [self.proxy]
46 |         self.route = {}
47 | 
48 |     def serve_forever(self):
49 |         print 'proxy listen...'
50 |         while 1:
51 |             readable, _, _ = select.select(self.inputs, [], [])
52 |             for self.sock in readable:
53 |                 if self.sock == self.proxy:
54 |                     self.on_join()
55 |                 else:
56 |                     data = self.sock.recv(8096)
57 |                     if not data:
58 |                         self.on_quit()
59 |                     else:
60 |                         self.route[self.sock].send(data)
61 | 
62 |     def on_join(self):
63 |         client, addr = self.proxy.accept()
64 |         print addr, 'connect'
65 |         forward = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
66 |         forward.connect(to_addr)
67 |         self.inputs += [client, forward]
68 |         self.route[client] = forward
69 |         self.route[forward] = client
70 | 
71 |     def on_quit(self):
72 |         for s in self.sock, self.route[self.sock]:
73 |             self.inputs.remove(s)
74 |             del self.route[s]
75 |             s.close()
76 | 
77 | 
78 | if __name__ == '__main__':
79 |     try:
80 |         Proxy(('', 12345)).serve_forever()  # 代理服务器监听的地址
81 |     except KeyboardInterrupt:
82 |         sys.exit(1)
83 | 


--------------------------------------------------------------------------------
/standard_library/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Time    : 2017/4/29 14:43
4 | # @Author  : Aries
5 | # @Site    : 
6 | # @File    : __init__.py.py
7 | # @Software: PyCharm
8 | 
9 | 


--------------------------------------------------------------------------------
/standard_library/email/message_html.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-8-29 下午2:33
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : html_message.py
 7 | # @Product        : PyCharm
 8 | # @Docs           : 
 9 | # @Source         : 
10 | 
11 | import smtplib
12 | 
13 | from email.mime.multipart import MIMEMultipart
14 | from email.mime.text import MIMEText
15 | 
16 | # #config
17 | FROM_ADDRESS = ''  # sender's address
18 | EMAIL_PWD = ''     # password
19 | TO_ADDRESSES = ''  # recipient's email address
20 | SMTP_HOST = ''
21 | SMTP_PORT = 25
22 | 
23 | # Create message container - the correct MIME type is multipart/alternative.
24 | msg = MIMEMultipart('alternative')
25 | msg['Subject'] = "Link"
26 | msg['From'] = FROM_ADDRESS
27 | msg['To'] = TO_ADDRESSES
28 | 
29 | # Create the body of the message (a plain-text and an HTML version).
30 | text = "Hi!\nHow are you?\nHere is the link you wanted:\nhttps://www.python.org"
31 | html = """\
32 | <html>
33 |   <head></head>
34 |   <body>
35 |     <p>Hi!<br>
36 |        How are you?<br>
37 |        Here is the <a href="https://www.python.org">link</a> you wanted.
38 |     </p>
39 |   </body>
40 | </html>
41 | """
42 | 
43 | # Record the MIME types of both parts - text/plain and text/html.
44 | part1 = MIMEText(text, 'plain')
45 | part2 = MIMEText(html, 'html')
46 | 
47 | # Attach parts into message container.
48 | # According to RFC 2046, the last part of a multipart message, in this case
49 | # the HTML message, is best and preferred.
50 | msg.attach(part1)
51 | msg.attach(part2)
52 | 
53 | # Send the message via local SMTP server.
54 | s = smtplib.SMTP()
55 | s.connect(host=SMTP_HOST, port=SMTP_PORT)
56 | s.login(FROM_ADDRESS, EMAIL_PWD)
57 | 
58 | # sendmail function takes 3 arguments: sender's address, recipient's address
59 | # and message to send - here it is sent as one string.
60 | s.sendmail(FROM_ADDRESS, [TO_ADDRESSES], msg.as_string())
61 | s.quit()
62 | 


--------------------------------------------------------------------------------
/standard_library/email/message_text.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-8-28 下午4:31
 4 | # @Author         : Tom.Lee
 5 | # @File           : text_message.py
 6 | # @Product        : PyCharm
 7 | # @Source         :
 8 | 
 9 | """创建并发送简单文本消息"""
10 | import smtplib
11 | from email.mime.text import MIMEText
12 | 
13 | # # config email
14 | me = ''
15 | you = ''
16 | smtp_host = ''
17 | smtp_port = 25
18 | passwd = ''
19 | textfile = 'textfile'
20 | 
21 | # Open a plain text file for reading.  For this example, assume that
22 | # the text file contains only ASCII characters.
23 | fp = open(textfile, 'rb')
24 | # Create a text/plain message
25 | msg = MIMEText(fp.read(), 'text', 'utf-8')
26 | fp.close()
27 | 
28 | # me == the sender's email address
29 | # you == the recipient's email address
30 | msg['Subject'] = 'The contents of %s' % textfile
31 | msg['From'] = me
32 | msg['To'] = you
33 | 
34 | # Send the message via our own SMTP server, but don't include the
35 | # envelope header.
36 | s = smtplib.SMTP()
37 | s.connect(host=smtp_host, port=smtp_port)
38 | s.set_debuglevel(1)
39 | s.login(me, passwd)
40 | s.sendmail(me, [you], msg.as_string())
41 | s.quit()
42 | 


--------------------------------------------------------------------------------
/standard_library/email/shell_mime.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Time           : 17-8-28 下午4:09
  4 | # @Author         : Tom.Lee
  5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
  6 | # @File           : mime.py
  7 | # @Product        : PyCharm
  8 | # @Docs           : 
  9 | # @Source         : 
 10 | 
 11 | 
 12 | """Send the contents of a directory as a MIME message."""
 13 | 
 14 | import os
 15 | import smtplib
 16 | import sys
 17 | 
 18 | # For guessing MIME type based on file name extension
 19 | import mimetypes
 20 | 
 21 | from optparse import OptionParser
 22 | 
 23 | from email import encoders
 24 | from email.mime.audio import MIMEAudio
 25 | from email.mime.base import MIMEBase
 26 | from email.mime.image import MIMEImage
 27 | from email.mime.multipart import MIMEMultipart
 28 | from email.mime.text import MIMEText
 29 | 
 30 | COMMASPACE = ', '
 31 | 
 32 | 
 33 | def main():
 34 |     parser = OptionParser(usage="""\
 35 | Send the contents of a directory as a MIME message.
 36 | 
 37 | Usage: %prog [options]
 38 | 
 39 | Unless the -o option is given, the email is sent by forwarding to your local
 40 | SMTP server, which then does the normal delivery process.  Your local machine
 41 | must be running an SMTP server.
 42 | """)
 43 |     parser.add_option('-d', '--directory',
 44 |                       type='string', action='store',
 45 |                       help="""Mail the contents of the specified directory,
 46 |                       otherwise use the current directory.  Only the regular
 47 |                       files in the directory are sent, and we don't recurse to
 48 |                       subdirectories.""")
 49 |     parser.add_option('-o', '--output',
 50 |                       type='string', action='store', metavar='FILE',
 51 |                       help="""Print the composed message to FILE instead of
 52 |                       sending the message to the SMTP server.""")
 53 |     parser.add_option('-s', '--sender',
 54 |                       type='string', action='store', metavar='SENDER',
 55 |                       help='The value of the From: header (required)')
 56 |     parser.add_option('-r', '--recipient',
 57 |                       type='string', action='append', metavar='RECIPIENT',
 58 |                       default=[], dest='recipients',
 59 |                       help='A To: header value (at least one required)')
 60 |     opts, args = parser.parse_args()
 61 |     if not opts.sender or not opts.recipients:
 62 |         parser.print_help()
 63 |         sys.exit(1)
 64 |     directory = opts.directory
 65 |     if not directory:
 66 |         directory = '.'
 67 |     # Create the enclosing (outer) message
 68 |     outer = MIMEMultipart()
 69 |     outer['Subject'] = 'Contents of directory %s' % os.path.abspath(directory)
 70 |     outer['To'] = COMMASPACE.join(opts.recipients)
 71 |     outer['From'] = opts.sender
 72 |     outer.preamble = 'You will not see this in a MIME-aware mail reader.\n'
 73 | 
 74 |     for filename in os.listdir(directory):
 75 |         path = os.path.join(directory, filename)
 76 |         if not os.path.isfile(path):
 77 |             continue
 78 |         # Guess the content type based on the file's extension.  Encoding
 79 |         # will be ignored, although we should check for simple things like
 80 |         # gzip'd or compressed files.
 81 |         ctype, encoding = mimetypes.guess_type(path)
 82 |         if ctype is None or encoding is not None:
 83 |             # No guess could be made, or the file is encoded (compressed), so
 84 |             # use a generic bag-of-bits type.
 85 |             ctype = 'application/octet-stream'
 86 |         maintype, subtype = ctype.split('/', 1)
 87 |         if maintype == 'text':
 88 |             fp = open(path)
 89 |             # Note: we should handle calculating the charset
 90 |             msg = MIMEText(fp.read(), _subtype=subtype)
 91 |             fp.close()
 92 |         elif maintype == 'image':
 93 |             fp = open(path, 'rb')
 94 |             msg = MIMEImage(fp.read(), _subtype=subtype)
 95 |             fp.close()
 96 |         elif maintype == 'audio':
 97 |             fp = open(path, 'rb')
 98 |             msg = MIMEAudio(fp.read(), _subtype=subtype)
 99 |             fp.close()
100 |         else:
101 |             fp = open(path, 'rb')
102 |             msg = MIMEBase(maintype, subtype)
103 |             msg.set_payload(fp.read())
104 |             fp.close()
105 |             # Encode the payload using Base64
106 |             encoders.encode_base64(msg)
107 |         # Set the filename parameter
108 |         msg.add_header('Content-Disposition', 'attachment', filename=filename)
109 |         outer.attach(msg)
110 |     # Now send or store the message
111 |     composed = outer.as_string()
112 |     if opts.output:
113 |         fp = open(opts.output, 'w')
114 |         fp.write(composed)
115 |         fp.close()
116 |     else:
117 |         s = smtplib.SMTP('localhost')
118 |         s.sendmail(opts.sender, opts.recipients, composed)
119 |         s.quit()
120 | 
121 | 
122 | if __name__ == '__main__':
123 |     main()
124 | 


--------------------------------------------------------------------------------
/standard_library/email/textfile:
--------------------------------------------------------------------------------
1 | hello world!
2 | use python smtplib.


--------------------------------------------------------------------------------
/standard_library/process/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Time    : 17-5-7 下午11:15
4 | # @Author  : tom.lee
5 | # @Site    : 
6 | # @File    : __init__.py.py
7 | # @Software: PyCharm


--------------------------------------------------------------------------------
/standard_library/process/process_pool.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | import random
 3 | from time import sleep
 4 | import sys
 5 | import multiprocessing
 6 | import os
 7 | 
 8 | lock = multiprocessing.Lock()  # 一个锁
 9 | 
10 | 
11 | def a(x):
12 |     lock.acquire()
13 |     print '开始进程：', os.getpid(), '模拟进程时间:', x
14 |     lock.release()
15 |     sleep(x)  # 模拟执行操作
16 |     lock.acquire()
17 |     print '结束进程：', os.getpid(), '预测下一个进程启动会使用该进程号'
18 |     lock.release()
19 | 
20 | 
21 | list = []
22 | for i in range(10):
23 |     list.append(random.randint(1, 10))
24 | pool = multiprocessing.Pool(processes=3)  # 限制并行进程数为3
25 | pool.map(a, list)
26 | 


--------------------------------------------------------------------------------
/standard_library/process/simple_core.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 17-5-7 下午11:16
 4 | # @Author  : tom.lee
 5 | # @Site    : 
 6 | # @File    : simple_core.py
 7 | # @Software: PyCharm
 8 | 
 9 | 
10 | from multiprocessing import Process
11 | import time
12 | import os
13 | 
14 | 
15 | def worker_1(interval):
16 |     print("worker_1,父进程(%s),当前进程(%s)" % (os.getppid(), os.getpid()))
17 |     t_start = time.time()
18 |     time.sleep(interval)
19 |     t_end = time.time()
20 |     print("worker_1,执行时间为'%0.2f'秒" % (t_end - t_start))
21 | 
22 | 
23 | def worker_2(interval):
24 |     print("worker_2,父进程(%s),当前进程(%s)" % (os.getppid(), os.getpid()))
25 |     t_start = time.time()
26 |     time.sleep(interval)
27 |     t_end = time.time()
28 |     print("worker_2,执行时间为'%0.2f'秒" % (t_end - t_start))
29 | 
30 | 
31 | if __name__ == "__main__":
32 |     print("进程ID：%s" % os.getpid())
33 |     # 如果不指定name参数，默认的进程对象名称为Process-N，N为一个递增的整数
34 |     p1 = Process(target=worker_1, args=(20,))
35 |     p1.start()
36 |     # p1.join() # 阻塞，禁止并发
37 |     p2 = Process(target=worker_2, name="dongGe", args=(10,))
38 | 
39 |     p2.start()
40 |     print("p2.is_alive=%s" % p2.is_alive())
41 |     print("p1.name=%s" % p1.name)
42 |     print("p1.pid=%s" % p1.pid)
43 |     print("p2.name=%s" % p2.name)
44 |     print("p2.pid=%s" % p2.pid)
45 |     # p1.join()　# 然而没什么卵用
46 |     print("p1.is_alive=%s" % p1.is_alive())
47 | 


--------------------------------------------------------------------------------
/standard_library/study_argparse.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-7-5 下午2:14
 4 | # @Author         : Tom.Lee
 5 | # @Description    : 
 6 | # @File           : study_argparse.py
 7 | # @Product        : PyCharm
 8 | 
 9 | import argparse
10 | 
11 | if __name__ == '__main__':
12 |     parser = argparse.ArgumentParser(description='test argparse')
13 | 
14 |     parser.add_argument('--user', dest='USER', type=str,
15 |                         required=True,
16 |                         help='User Name')
17 |     parser.add_argument('-H', '--host', dest='HOST', type=str,
18 |                         default='localhost',
19 |                         help='Server Ip Address')
20 |     parser.add_argument('-P', '--port', dest='PORT', type=int,
21 |                         default=3306,
22 |                         help='Server Connection Port')
23 | 
24 |     args = parser.parse_args()
25 |     print args
26 |     print getattr(args, 'no', None)
27 |     print getattr(args, 'PORT', None)
28 | 


--------------------------------------------------------------------------------
/standard_library/study_color_print.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-7-21 下午2:50
 4 | # @Author         : Tom.Lee
 5 | # @File           : study_color_print.py
 6 | # @Product        : PyCharm
 7 | 
 8 | """
 9 | \033[1;31;40m    <!--1-高亮显示 31-前景色红色  40-背景色黑色-->
10 | \033[0m          <!--采用终端默认设置，即取消颜色设置-->
11 | """
12 | 
13 | print '\033[1;31;40m '
14 | print '*' * 25, 'LOG', '*' * 25
15 | print 'hello world!'
16 | print '\033[0m'
17 | 


--------------------------------------------------------------------------------
/standard_library/study_file.py:
--------------------------------------------------------------------------------
 1 | #  -*- coding=utf-8 -*-
 2 | 
 3 | """
 4 | 文件操作
 5 | """
 6 | 
 7 | import errno
 8 | import os
 9 | 
10 | import six
11 | 
12 | 
13 | def create_dir(path):
14 |     if not os.path.exists(path):
15 |         os.makedirs(path)
16 |     else:
17 |         print u'文件夹%s　已经存在' % path
18 | 
19 | 
20 | def parent_dir(path):
21 |     if path[-1] == '/': path = path[0:-1]
22 |     return '/'.join(path.split('/')[0:-1])
23 | 
24 | 
25 | def del_dir(path):
26 |     if not all((os.path.exists(path), os.path.isdir(path))):
27 |         return
28 |     for root, dirs, files in os.walk(path, topdown=False):
29 |         for name in files:
30 |             os.remove(os.path.join(root, name))
31 |         for name in dirs:
32 |             os.rmdir(os.path.join(root, name))
33 |     os.rmdir(path)
34 | 
35 | 
36 | def create_file(name, mode='r', data=""):
37 |     try:
38 |         parent_path = parent_dir(name)
39 |         if parent_path and not os.path.exists(parent_path):
40 |             create_dir(parent_path)
41 |         with open(name, mode)as f:
42 |             f.write(data)
43 |     except Exception, e:
44 |         print u'%s 创建失败\n异常：%s' % (name, e)
45 | 
46 | 
47 | def remove_file(file_path):
48 |     try:
49 |         os.remove(file_path)
50 |     except OSError:
51 |         pass
52 | 
53 | 
54 | def get_file_size(file_obj):
55 | 
56 |     if (hasattr(file_obj, 'seek') and hasattr(file_obj, 'tell') and
57 |             (six.PY2 or six.PY3 and file_obj.seekable())):
58 |         try:
59 |             curr = file_obj.tell()
60 |             file_obj.seek(0, os.SEEK_END)
61 |             size = file_obj.tell()
62 |             file_obj.seek(curr)
63 |             return size
64 |         except IOError as e:
65 |             if e.errno == errno.ESPIPE:
66 |                 return
67 |             else:
68 |                 raise
69 | 
70 | 
71 | if __name__ == '__main__':
72 |     # create_file('/home/aric/pythontest/bb/bbb/abc.txt', 'w', 'hello world')
73 | 
74 |     del_dir('/home/liyuanjun/keys')
75 | 


--------------------------------------------------------------------------------
/standard_library/study_filter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 17-7-22 上午1:18
 4 | # @Author  : tom.lee
 5 | # @Site    : 
 6 | # @File    : study_filter.py
 7 | # @Software: PyCharm
 8 | 
 9 | """
10 | 按照某种规则过滤掉一些元素
11 | 
12 | 接收一个 boolean返回值的函数，可用时lambda,可以是自定义的函数，
13 | 迭代传入的可迭代对象的每个元素进行过滤
14 | """
15 | 
16 | lst = [1, 2, 3, 4, 5, 6]
17 | # 所有奇数都会返回True, 偶数会返回False被过滤掉
18 | print filter(lambda x: x % 2 != 0, lst)
19 | # 输出结果 [1, 3, 5]
20 | 
21 | 


--------------------------------------------------------------------------------
/standard_library/study_httplib.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-7-21 下午2:45
 4 | # @Author         : Tom.Lee
 5 | # @File           : study_httplib.py
 6 | # @Product        : PyCharm
 7 | import httplib
 8 | import urllib
 9 | 
10 | 
11 | def request_get(host, port, url, params=None, headers=None, timeout=5):
12 |     status, http_clint, data = None, None, None
13 |     try:
14 |         http_clint = httplib.HTTPConnection(host, port, timeout=timeout)
15 |         url = url + urllib.urlencode(params or {})
16 |         http_clint.request('GET', url, headers=headers or {})
17 |         response = http_clint.getresponse()
18 |         status = response.status
19 |         data = response.read()
20 |     except Exception, e:
21 |         print e
22 |     finally:
23 |         if http_clint:
24 |             http_clint.close()
25 |         return status, data
26 | 
27 | 
28 | def request_post(host, port, url, body=None, headers=None, timeout=5):
29 |     status, http_clint, data = None, None, None
30 |     try:
31 |         http_clint = httplib.HTTPConnection(host, port, timeout=timeout)
32 |         http_clint.request('POST', url, body, headers)
33 |         response = http_clint.getresponse()
34 |         status = response.status
35 |         data = response.read()
36 | 
37 |     except Exception, e:
38 |         print 'http post error :{0}'.format(e)
39 |     finally:
40 |         if http_clint:
41 |             http_clint.close()
42 |         return status, data
43 | 


--------------------------------------------------------------------------------
/standard_library/study_itertools.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-9-8 下午1:50
 4 | # @Author         : Tom.Lee
 5 | # @File           : study_itertools.py
 6 | # @Product        : PyCharm
 7 | # @Source         :
 8 | 
 9 | 
10 | import itertools
11 | 
12 | """
13 | 合并多个词为一个列表:　
14 | >>>itertools.chain(*iterable)
15 | """
16 | lst = itertools.chain('hello', 'world', '!')
17 | print type(lst)  # <type 'itertools.chain'>
18 | print list(lst)  # ['h', 'e', 'l', 'l', 'o', 'w', 'o', 'r', 'l', 'd', '!']
19 | 
20 | """
21 | 返回指定长度的序列中的字符"组合"(排列组合):
22 | >>>itertools.combinations(iterable, r)
23 | """
24 | lst1 = itertools.combinations('abc', 2)
25 | print list(lst1)  # [('a', 'b'), ('a', 'c'), ('b', 'c')]
26 | 
27 | """
28 | 返回指定长度的“组合”，组合内元素可重复:
29 | >>>itertools.combinations_with_replacement(iterable, r)
30 | """
31 | ls2 = itertools.combinations_with_replacement('abc', 2)
32 | print list(ls2)
33 | 


--------------------------------------------------------------------------------
/standard_library/study_logging.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # @Time           : 17-7-5 下午1:10
  4 | # @Author         : Tom.Lee
  5 | # @Description    : 
  6 | # @File           : study_logging.py
  7 | # @Product        : PyCharm
  8 | 
  9 | """
 10 | 注意：basicConfig有一个 很大的缺点。
 11 | 调用basicConfig其实是给root logger添加了一个handler，
 12 | 这样当你的程序和别的使用了 logging的第三方模块一起工作时，
 13 | 会影响第三方模块的logger行为。这是由logger的继承特性决定的。
 14 | """
 15 | 
 16 | import logging
 17 | import sys
 18 | 
 19 | FORMAT_STR = "[%(asctime)s] %(name)s:%(levelname)s: %(message)s"
 20 | 
 21 | 
 22 | # logger = logging.getLogger("django")
 23 | # logger.debug(logging.DEBUG)  # 使用django热加载
 24 | 
 25 | 
 26 | def config1():
 27 |     """
 28 |     **********************Config 1**********************
 29 |     """
 30 |     # config 1.
 31 |     # 设置默认的level为DEBUG
 32 |     # 设置log的格式
 33 |     # 注意：basicConfig有一个 很大的缺点。
 34 |     # 调用basicConfig其实是给root logger添加了一个handler，
 35 |     # 这样当你的程序和别的使用了 logging的第三方模块一起工作时，
 36 |     # 会影响第三方模块的logger行为。这是由logger的继承特性决定的。
 37 |     logging.basicConfig(
 38 |         level=logging.DEBUG,
 39 |         format="[%(asctime)s] %(name)s:%(levelname)s: %(message)s"
 40 |     )
 41 | 
 42 |     # 记录log
 43 |     logging.debug('debug')
 44 |     logging.info('info')
 45 |     logging.warn('warn')
 46 |     logging.error('error')
 47 |     logging.critical('critical')
 48 | 
 49 | 
 50 | def config2():
 51 |     """
 52 |     ********************Config 2************************
 53 |     """
 54 |     # # config 2
 55 |     # 使用一个名字为fib的logger
 56 |     logger = logging.getLogger('app_name')
 57 |     # 设置logger的level为DEBUG
 58 |     logger.setLevel(logging.DEBUG)
 59 |     # 创建一个输出日志到控制台的StreamHandler
 60 |     handler = logging.StreamHandler()
 61 |     formatter = logging.Formatter('[%(asctime)s] %(name)s:%(levelname)s: %(message)s')
 62 |     handler.setFormatter(formatter)
 63 |     # 给logger添加上handler
 64 |     logger.addHandler(handler)
 65 | 
 66 |     logger.debug('debug message')
 67 |     logger.info('hello world')
 68 | 
 69 | 
 70 | def config3():
 71 |     """
 72 |     config3 输出到文件
 73 |     """
 74 |     # 获取logger实例，如果参数为空则返回root logger
 75 |     logger = logging.getLogger("AppName")
 76 |     # 指定logger输出格式
 77 |     formatter = logging.Formatter(FORMAT_STR)
 78 |     # 文件日志
 79 |     file_handler = logging.FileHandler("test.log")
 80 |     file_handler.setFormatter(formatter)  # 可以通过setFormatter指定输出格式
 81 |     # 控制台日志
 82 |     console_handler = logging.StreamHandler(sys.stdout)
 83 |     console_handler.formatter = formatter  # 也可以直接给formatter赋值
 84 |     # 为logger添加的日志处理器，可以自定义日志处理器让其输出到其他地方
 85 |     logger.addHandler(file_handler)
 86 |     logger.addHandler(console_handler)
 87 |     # 指定日志的最低输出级别，默认为WARN级别
 88 |     logger.setLevel(logging.INFO)
 89 | 
 90 |     # 输出不同级别的log
 91 |     logger.debug('this is debug info')
 92 |     logger.info('this is information')
 93 |     logger.warn('this is warning message')
 94 |     logger.error('this is error message')
 95 |     logger.fatal('this is fatal message, it is same as logger.critical')
 96 |     logger.critical('this is critical message')
 97 | 
 98 | 
 99 | 
100 | # if __name__ == '__main__':
101 | 
102 | 


--------------------------------------------------------------------------------
/standard_library/study_os.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-4-19 上午11:02
 4 | # @Author         : Tom.Lee
 5 | # @Description    : 
 6 | # @File           : helper_os.py
 7 | # @Product        : PyCharm
 8 | import commands
 9 | import os
10 | import sys
11 | 
12 | 
13 | def shell():
14 |     command_ls = 'ls -al /opt'
15 |     command_docker = 'docker ps -a'
16 | 
17 |     # 使用os.system()模块
18 |     ros = os.system(command_ls)
19 |     print '\n\nos.system() : ', ros
20 | 
21 |     # 使用os.popen()模块
22 |     output = os.popen(command_docker)
23 |     result = output.read()
24 |     print '\n\nos.popen() : ', result
25 | 
26 |     # 使用commands模块
27 |     (status, output) = commands.getstatusoutput(command_docker)
28 |     print '\n\ncommands : ', status, output
29 | 
30 | 
31 | def deep_look_dir(dir_path, deep=1, console_full_path=False):
32 |     """
33 |     deep_look_dir(dir_name, console_full_path=False)
34 | 
35 |     遍历文件夹下所有文件
36 |     :param dir_path:  os.path.dirname(__file__)
37 |     :param deep:
38 |     :param console_full_path:
39 |     :return:
40 |     """
41 |     if deep == 1:
42 |         print dir_path
43 | 
44 |     files = os.listdir(dir_path)
45 |     split_symbol = '｜＿' * deep if deep == 1 else '｜' + '　' * (deep - 1) + '｜＿'
46 | 
47 |     for f in files:
48 |         f_path = os.path.join(dir_path, f)
49 |         console_name = f_path if console_full_path else f
50 | 
51 |         if not os.path.isfile(f_path):
52 |             print "{sp} {dir_path}/: ".format(
53 |                 sp=split_symbol,
54 |                 dir_path=console_name)
55 |             num = deep + 1
56 |             deep_look_dir(f_path, num, console_full_path)
57 | 
58 |         else:
59 |             print split_symbol, console_name
60 | 
61 | 
62 | if '__main__' == __name__:
63 |    deep_look_dir('/root/pythonStudy')
64 | 


--------------------------------------------------------------------------------
/standard_library/study_regular_expression.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-8-11 上午11:56
 4 | # @Author         : Tom.Lee
 5 | # @CopyRight      : 2016-2017 OpenBridge by yihecloud
 6 | # @File           : *regular_expression.py
 7 | # @Product        : PyCharm
 8 | # @docs           : http://www.cnblogs.com/dreamer-fish/p/5282679.html
 9 | 
10 | import re
11 | 
12 | """
13 | 正则表达式: r'[...]'　,[]内为要匹配的字符，用"|"来表示多种匹配
14 | 
15 | 1.特殊符号使用"\"转义：　"[" --> "\["
16 | 2.替换字符串：将123替换为空 re.compile(r'[123]').sub('', str)
17 | 3.查找特殊字符： 使用r'[...]'表示一组字符,单独列出：[amk] 匹配 'a'，'m'或'k'
18 | 
19 | """
20 | 
21 | # **********************替换字符***********************
22 | #
23 | #   re.compile(r'[...]').sub('', str)
24 | #
25 | # **********************替换字符***********************
26 | 
27 | 
28 | # 1.去掉字符串中无用的字符 "[u'","'", "u'" ,"']"
29 | s = "[u'node-2.domain.tld', u'node-1.domain.tld']"
30 | s1 = re.compile(r"[\[u'|'\]| u']").sub('', s).split(',')
31 | print s1, type(s1)  # ['node-2.domain.tld', 'node-1.domain.tld'] <type 'list'>
32 | 
33 | # 2.替换空格为'--'
34 | print re.compile(r'\s').sub('--', '1234 56 ')  # 1234--56--
35 | 
36 | # **********************匹配查找字符***********************
37 | #
38 | #   re.findall(r'*', content)　默认匹配每一行字符串为查找对象
39 | #   re.findall(r'[\d+]', s, re.S) 匹配时以整个字符串为查找对象
40 | #
41 | # **********************匹配查找字符***********************
42 | 
43 | # 1.提取字符中的数字
44 | s = '123abc456@#$%^7890'
45 | print re.findall(r'[\d+]', s)  # ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0']
46 | print re.findall(r'\d+', s, re.S)  # ['123', '456', '7890']
47 | 
48 | # 2.匹配Cidr  172.16.6.18/24
49 | print re.findall(r'(?<![\.\d])(?:\d{1,3}\.){3}\d{1,3}(?![\.\d])/24', s, re.S)
50 | # 3.匹配uuid  c6aa9c38-ccee-467f-9a1e-c718a33ecc06
51 | print re.findall(r'([a-f\d]{8}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{12})', s, re.S)
52 | 
53 | s = "You are not permitted to modify 'architecture' on this image."
54 | # 4.匹配单引号之内的值　(?<=').*?(?=')
55 | print re.findall(r"(?<=').*?(?=')", s, re.S)  # ['architecture']
56 | 


--------------------------------------------------------------------------------
/standard_library/study_socket.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time           : 17-4-19 上午10:58
 4 | # @Author         : Tom.Lee
 5 | # @Description    : 
 6 | # @File           : socket.py
 7 | # @Product        : PyCharm
 8 | import socket
 9 | import threading
10 | import time
11 | 
12 | 
13 | class Server(object):
14 |     NUMBER = 0
15 | 
16 |     def __init__(self, ip, port, message='hello'):
17 |         self.__ip = ip
18 |         self.__port = port
19 |         self.__message = message
20 | 
21 |     def start(self):
22 |         s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
23 |         s.bind((self.__ip, self.__port))
24 |         s.listen(0)
25 | 
26 |         def run(**kwargs):
27 |             num = self.NUMBER
28 |             client = kwargs.get('client')
29 |             print self.NUMBER, client, client.recv(1024)
30 |             time.sleep(10)
31 |             client.sendall('%s:%d' % (self.__message, num))
32 |             client.close()
33 | 
34 |         while True:
35 |             client, addr = s.accept()
36 |             if client:
37 |                 self.NUMBER += 1
38 |                 threading.Thread(target=run, name='T%d' % self.NUMBER,
39 |                                  kwargs={'client': client}).start()
40 | 
41 | 
42 | class Client(object):
43 |     def __init__(self, ip, port, message='hi'):
44 |         self.__ip = ip
45 |         self.__port = port
46 |         self.__message = message
47 | 
48 |     def start(self):
49 |         c = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
50 |         c.connect((self.__ip, self.__port))
51 |         c.send(self.__message)
52 |         print c.recv(1024)
53 |         c.close()
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     server = Server('127.0.0.1', 1000)
58 |     server.start()
59 | 


--------------------------------------------------------------------------------
/standard_library/threads/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # @Time    : 2017/4/29 14:43
4 | # @Author  : Aries
5 | # @Site    : 
6 | # @File    : __init__.py.py
7 | # @Software: PyCharm
8 | 
9 | 


--------------------------------------------------------------------------------
/standard_library/threads/demo_consumer_producer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2017/4/29 14:44
 4 | # @Author  : tom.lee
 5 | # @Site    : 
 6 | # @File    : consumer_producer.py
 7 | # @Software: PyCharm
 8 | 
 9 | 
10 | import time
11 | import threading
12 | import Queue
13 | 
14 | 
15 | class Consumer(threading.Thread):
16 |     def __init__(self, queue):
17 |         threading.Thread.__init__(self)
18 |         self._queue = queue
19 | 
20 |     def run(self):
21 |         while True:
22 |             msg = self._queue.get()
23 |             if isinstance(msg, str) and msg == 'quit':
24 |                 break
25 |             print "I'm a thread, and I received %s!!" % msg
26 |             self._queue.task_done()
27 |         print 'Bye byes!'
28 | 
29 | 
30 | def producer():
31 |     queue = Queue.Queue()
32 |     worker = Consumer(queue)
33 |     worker.start()
34 |     start_time = time.time()
35 |     # While under 5 seconds..
36 |     while time.time() - start_time < 5:
37 |         queue.put('something at %s' % time.time())
38 |         time.sleep(1)
39 |     queue.put('quit')
40 |     worker.join()
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     print 'test'
45 |     producer()
46 | 


--------------------------------------------------------------------------------
/standard_library/threads/my_thread_pool.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2017/4/29 17:06
 4 | # @Author  : Aries
 5 | # @Site    : 
 6 | # @File    : threadPool.py
 7 | # @Software: PyCharm
 8 | 
 9 | import threading
10 | 
11 | 
12 | class _Worker(threading.Thread):
13 |     def __init__(self, queue, func, *args, **kwargs):
14 |         super(_Worker, self).__init__(*args, **kwargs)
15 |         self.__queue = queue
16 |         self.__func = func
17 | 
18 |     def __task(self):
19 |         args = self.__queue.get()
20 |         self.__func(args)
21 |         self.__queue.task_done()
22 | 
23 |     def run(self):
24 |         while True:
25 |             self.__task()
26 | 
27 | 
28 | class ReThreadPool(object):
29 |     def __init__(self, queue, func, daemon=False, num=10):
30 |         self.daemon = daemon
31 |         self.num = num
32 |         self.queue = queue
33 |         self.func = func
34 | 
35 |     def execute(self):
36 |         for _ in range(self.num):
37 |             worker = _Worker(self.queue, self.func)
38 |             if self.daemon:
39 |                 worker.setDaemon(True)
40 |             worker.start()
41 |         self.queue.join()
42 | 
43 | 
44 | if __name__ == '__main__':
45 |     import time
46 |     import Queue
47 | 
48 |     start_time = time.time()
49 |     q = Queue.Queue()
50 |     for i in range(50):
51 |         q.put(i)
52 | 
53 | 
54 |     def test(num):
55 |         time.sleep(1)
56 |         print 'num:%d' % num
57 |         return
58 | 
59 | 
60 |     ReThreadPool(q, test).execute()
61 |     # 队列加入新数据
62 |     for i in range(50, 100):
63 |         q.put(i)
64 |     print time.time() - start_time
65 | 


--------------------------------------------------------------------------------
/standard_library/threads/rethread.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 17-4-23 下午9:31
 4 | # @Author  : tom.lee
 5 | # @Site    : 重写带退出方法的线程类
 6 | # @File    : rethread.py
 7 | # @Software: PyCharm
 8 | 
 9 | 
10 | import threading
11 | 
12 | 
13 | class ReThread(threading.Thread):
14 |     def __init__(self, *args, **kwargs):
15 |         super(ReThread, self).__init__(*args, **kwargs)
16 |         self.__flag = threading.Event()  # 用于暂停线程的标识
17 |         self.__flag.set()  # 设置为True
18 |         self.__running = threading.Event()  # 用于停止线程的标识
19 |         self.__running.set()  # 将running设置为True
20 | 
21 |     @property
22 |     def is_running(self):
23 |         """
24 |         获取运行标志
25 |         :return: True/False
26 |         """
27 |         return self.__running.isSet()
28 | 
29 |     def run(self):
30 |         """
31 |         使用while 循环,使用self.is_running 来获取运行标志位
32 |         """
33 |         pass
34 | 
35 |     def stop(self):
36 |         """
37 |         设置为False, 让线程阻塞
38 |         """
39 |         self.__flag.clear()
40 | 
41 |     def resume(self):
42 |         """
43 |         设置为True, 让线程停止阻塞
44 |         """
45 |         self.__flag.set()
46 | 
47 |     def exit(self):
48 |         """
49 |         暂停标志设置为True
50 |         运行标志设置为False
51 |         """
52 |         self.__flag.set()
53 |         self.__running.clear()
54 | 
55 | if __name__=="__main__":
56 |     rt= ReThread()
57 |     rt.start()
58 |     print '111'
59 |     # rt.join()
60 | 


--------------------------------------------------------------------------------
/standard_library/threads/thread_pool_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2017/4/29 14:46
 4 | # @Author  : TOM
 5 | # @Site    : 模拟线程池
 6 | # @File    : thread_pool_test.py
 7 | # @Software: PyCharm
 8 | 
 9 | 
10 | import time
11 | import threading
12 | import Queue
13 | import urllib2
14 | 
15 | 
16 | class Consumer(threading.Thread):
17 |     def __init__(self, queue):
18 |         threading.Thread.__init__(self)
19 |         self._queue = queue
20 | 
21 |     def run(self):
22 |         while True:
23 |             content = self._queue.get()
24 |             print content
25 |             response = urllib2.urlopen(content)
26 |             self._queue.task_done()
27 |             print content,'_______________ok'
28 | 
29 | 
30 | def build_worker_pool(queue, size):
31 |     workers = []
32 |     for _ in range(size):
33 |         worker = Consumer(queue)
34 |         # 通过setDaemon(true)来设置线程为“守护线程”
35 |         # 在没有用户线程可服务时会自动离开
36 |         worker.setDaemon(True)
37 |         worker.start()
38 |         workers.append(worker)
39 |     return workers
40 | 
41 | 
42 | def Producer():
43 |     urls = [
44 |         'http://www.python.org',
45 |         'http://www.python.org/about/',
46 |         'http://www.onlamp.com/pub/a/python/2003/04/17/metaclasses.html',
47 |         'http://www.python.org/doc/',
48 |         'http://www.python.org/download/',
49 |         'http://www.python.org/getit/',
50 |         'http://www.python.org/community/',
51 |         'https://wiki.python.org/moin/',
52 |         'http://planet.python.org/',
53 |         'https://wiki.python.org/moin/LocalUserGroups',
54 |         'http://www.python.org/psf/',
55 |         'http://docs.python.org/devguide/',
56 |         'http://www.python.org/community/awards/'
57 |         # etc..
58 |     ]
59 |     start_time = time.time()
60 |     queue = Queue.Queue()
61 |     build_worker_pool(queue, 4)
62 |     for url in urls:
63 |         queue.put(url)
64 | 
65 |     queue.join()
66 |     print 'time use :', time.time() - start_time
67 | 
68 | 
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     Producer()
73 | 


--------------------------------------------------------------------------------
/standard_library/threads/thread_pool_test2.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # @Time    : 2017/4/29 16:50
 4 | # @Author  : Aries
 5 | # @Site    : 
 6 | # @File    : thread_pool_test2.py
 7 | # @Software: PyCharm
 8 | 
 9 | 
10 | import Queue
11 | import threading
12 | import urllib2
13 | import time
14 | 
15 | 
16 | class ThreadUrl(threading.Thread):
17 |     """Threaded Url Grab"""
18 | 
19 |     def __init__(self, queue):
20 |         threading.Thread.__init__(self)
21 |         self.queue = queue
22 | 
23 |     def run(self):
24 |         while True:
25 |             # 从队列抓取主机
26 |             # print self.queue
27 |             # print self.queue.qsize()
28 |             host = self.queue.get()
29 |             print host,'**************************'
30 |             # 抓取主机的URL和打印第一个1024字节的页面
31 |             url = urllib2.urlopen(host)
32 |             # print url.read(10)
33 |             # 队列工作的信号完成
34 |             self.queue.task_done()
35 | 
36 | 
37 | hosts = [
38 |     'http://www.python.org',
39 |     'http://www.python.org/about/',
40 |     'http://www.onlamp.com/pub/a/python/2003/04/17/metaclasses.html',
41 |     'http://www.python.org/doc/',
42 |     'http://www.python.org/download/',
43 |     'http://www.python.org/getit/',
44 |     'http://www.python.org/community/',
45 |     'https://wiki.python.org/moin/',
46 |     'http://planet.python.org/',
47 |     'https://wiki.python.org/moin/LocalUserGroups',
48 |     'http://www.python.org/psf/',
49 |     'http://docs.python.org/devguide/',
50 |     'http://www.python.org/community/awards/'
51 | ]
52 | 
53 | 
54 | def main():
55 |     start = time.time()
56 |     # 创建一个 Queue.Queue() 的实例，然后使用数据对它进行填充。
57 |     queue = Queue.Queue()
58 |     # 生成线程池，并传递队列实例
59 |     for i in range(4):
60 |         t = ThreadUrl(queue)
61 |         # 通过将守护线程设置为 true，
62 |         # 将允许主线程或者程序仅在守护线程处于活动状态时才能够退出。
63 |         t.setDaemon(True)
64 |         t.start()
65 |     # 用数据填充队列
66 |     for host in hosts:
67 |         queue.put(host)
68 |     # 等待队列，直到一切都被处理
69 |     queue.join()
70 |     print "Elapsed Time: %s" % (time.time() - start)
71 | 
72 | 
73 | main()
74 | 
75 | 


--------------------------------------------------------------------------------
/use_package.md:
--------------------------------------------------------------------------------
 1 | # 如何正确的使用python package
 2 | 一般，我们使用python其他的模块的时候，如果简单的在一个包内引用其他模块的函数，使用`import 模块名称`就ok了，
 3 | 然后使用`模块名称.函数名称`来调用这个函数，或者使用`from 模块名　import 函数名称`来引用
 4 | ```python
 5 | import model
 6 | 
 7 | print model.add(1,2)
 8 | 
 9 | """
10 | 或者使用该方式，效果相同
11 | 
12 | from model import add
13 | print add(1,2)
14 | """
15 | ```
16 | 
17 | 当引用其他包的其他模块时，一般使用`from 包名　import 模块名`　或者 直接使用from倒入函数`from 包名．模块名　import 函数名称`来使用这个函数
18 | ```python
19 | from test import db2
20 | print db2.delete()
21 | 
22 | """
23 | 或者使用该方式，效果相同
24 | 
25 | from test.db2 import delete
26 | print delete()
27 | """
28 | ```
29 | 当然以上的方式完全可以帮助我们解决问题，但是我们已经知道了，想要引用其他包的python模块，该模块所在的包必须包含一个名为`__init__.py`的文件，
30 | 这个文件是标志该文件夹是个python目录，我们可以使用`from 包名　import 模块名，...`的方式，把该包里的模块引用到这个文件，然后在其他位置
31 | 引用该包的模块时，只需要导入这个包名即可,如：`import test`，那如何使用模块中的函数：`包名．模块名．函数名`这样来使用，虽然效果是一样的，
32 | 但是这个感觉整个项目的模块结构更加清晰,例如
33 | ```
34 | .
35 | ├── app.py
36 | └── celerys
37 |     ├── __init__.py
38 |     └── tasks.py
39 | 
40 | 1 directory, 3 files
41 | 
42 | ```
43 | \_\_init\_\_.py
44 | ```python
45 | """
46 | import models
47 | """
48 | 
49 | from celerys import tasks
50 | ```
51 | tasks.py
52 | ```python
53 | #! /bin/python
54 | 
55 | from celery import Celery
56 | 
57 | broker = 'redis://127.0.0.1:6379/5'
58 | backend = 'redis://127.0.0.1:6379/6'
59 | 
60 | app = Celery('tasks', broker=broker, backend=backend)
61 | 
62 | @app.task
63 | def add(x, y):
64 |     return x + y
65 | ```
66 | app.py
67 | ```python
68 | import celerys
69 | 
70 | if __name__ == '__main__':
71 |     print celerys.tasks.add(1, 2)
72 | 
73 | ```
74 | 


--------------------------------------------------------------------------------