├── python_utils
    ├── matplot
    │   ├── __init__.py
    │   ├── plot_accuracy_loss.py
    │   ├── basic.py
    │   └── plot_many.py
    ├── utils
    │   ├── __init__.py
    │   └── JsonUtil.py
    ├── DbService
    │   ├── __init__.py
    │   ├── mysql_db
    │   │   ├── __init__.py
    │   │   ├── DbBase.py
    │   │   └── DbSubService.py
    │   ├── redis_db
    │   │   └── __init__.py
    │   ├── sqlalchemy
    │   │   ├── __init__.py
    │   │   ├── orm.py
    │   │   └── basic.py
    │   └── config
    │   │   └── mysql_config.json
    ├── al_lt_common
    │   ├── __init__.py
    │   ├── al_str.py
    │   └── al_cv.py
    ├── distributed
    │   ├── __init__.py
    │   └── zookeeper_demo
    │   │   ├── __init__.py
    │   │   ├── zk_failover_monitor.py
    │   │   ├── zk_watch.py
    │   │   ├── zk_failover_worker.py
    │   │   ├── zk_lock_demo.py
    │   │   ├── zk_master_select.py
    │   │   └── zk_node_ope.py
    ├── http_basic
    │   ├── __init__.py
    │   ├── flask_web
    │   │   ├── __init__.py
    │   │   ├── flask_restful.py
    │   │   ├── flask_resp.py
    │   │   ├── flask_file_svr.py
    │   │   ├── flask_auth.py
    │   │   ├── flask_error_handler.py
    │   │   ├── flask_basic_web.py
    │   │   └── flask_content_type.py
    │   ├── http_realize
    │   │   ├── __init__.py
    │   │   ├── http_realize_1
    │   │   │   ├── __init__.py
    │   │   │   └── http_server_1.py
    │   │   ├── static_server
    │   │   │   ├── __init__.py
    │   │   │   ├── plain.html
    │   │   │   └── static_server.py
    │   │   ├── test_SimpleHTTPServer.py
    │   │   ├── http_svr_simple.py
    │   │   └── http_svr_basic_1.py
    │   ├── simple_rpc
    │   │   ├── __init__.py
    │   │   ├── rpc_client_1.py
    │   │   └── rpc_server_1.py
    │   ├── socket_basic
    │   │   ├── __init__.py
    │   │   ├── udp_sock
    │   │   │   ├── __init__.py
    │   │   │   ├── udp_client.py
    │   │   │   └── udp_server.py
    │   │   ├── SocketServer_basic.py
    │   │   └── basic_client.py
    │   ├── wsgi_demo
    │   │   ├── __init__.py
    │   │   └── wsgi_demo.py
    │   ├── http_client_get.py
    │   └── url_ope.py
    ├── machine_learn
    │   ├── __init__.py
    │   ├── PCA
    │   │   ├── __init__.py
    │   │   └── pca_basic.py
    │   ├── Bayes
    │   │   ├── __init__.py
    │   │   └── bayes_sklearn.py
    │   ├── cluster
    │   │   ├── __init__.py
    │   │   └── sk_cluster.py
    │   ├── knearest
    │   │   ├── __init__.py
    │   │   ├── knn_scratch.py
    │   │   └── knn_classify_sklearn.py
    │   ├── decision_tree
    │   │   ├── __init__.py
    │   │   ├── dt.png
    │   │   ├── tree.dot
    │   │   ├── create_data.py
    │   │   ├── dtree_scratch.py
    │   │   └── dtree_sklearn.py
    │   ├── perception
    │   │   ├── __init__.py
    │   │   └── perception.py
    │   ├── linear_regression
    │   │   ├── __init__.py
    │   │   └── sk_example.py
    │   ├── logistic_regression
    │   │   ├── __init__.py
    │   │   ├── lr_sklearn_v1.py
    │   │   └── lr_scratch.py
    │   ├── neural_network_keras
    │   │   ├── __init__.py
    │   │   ├── lstm_nlp.py
    │   │   ├── cnn_keras_digits.py
    │   │   └── nn_keras_digits.py
    │   └── dataset
    │   │   ├── decision_tree
    │   │       └── data_banknote_authentication.txt
    │   │   ├── cluster
    │   │       └── cluster_txt
    │   │   ├── perception
    │   │       └── dataset.txt
    │   │   └── logistic_regression
    │   │       └── lr_ml_action.txt
    ├── netsocket
    │   ├── __init__.py
    │   ├── basic_socket.py
    │   └── ip_int.py
    ├── numpy_operate
    │   ├── __init__.py
    │   ├── structured_arr.py
    │   ├── arr_vectorize.py
    │   ├── flip_arr.py
    │   ├── zero_one_empty.py
    │   ├── broadcast_demo.py
    │   ├── array_multiply.py
    │   ├── np_distance.py
    │   ├── arr_equal_close.py
    │   ├── arr_sort.py
    │   ├── algebra_op.py
    │   ├── log2_op.py
    │   ├── idx_arrays.py
    │   ├── random_arr.py
    │   └── array_create.py
    ├── opencv_basic
    │   ├── __init__.py
    │   ├── path_var.py
    │   ├── cv_basic_op.py
    │   └── url_img_cv.py
    ├── thread_process
    │   ├── __init__.py
    │   ├── basic_process.py
    │   ├── sema_thread.py
    │   ├── basic_thread.py
    │   ├── thread_timer.py
    │   ├── multitread_profile.py
    │   ├── thread_condition.py
    │   ├── pool_dummy.py
    │   ├── pool_queue.py
    │   ├── thread_lock.py
    │   └── thread_queue.py
    ├── document
    │   ├── machine_learn
    │   │   ├── knearest
    │   │   │   └── README.md
    │   │   └── percepton
    │   │   │   ├── perception_plot.jpg
    │   │   │   ├── perception_ret.jpg
    │   │   │   └── README.md
    │   └── numpy_operate
    │   │   └── README.md
    ├── py_basic
    │   ├── __init__.py
    │   ├── arg_parse.py
    │   ├── MD5_sha.py
    │   ├── log_config.py
    │   ├── global_val.py
    │   ├── profile_ope.py
    │   ├── with_usage.py
    │   ├── collection_ope.py
    │   ├── except_ope.py
    │   ├── argparse_ope.py
    │   ├── tuple_operate.py
    │   ├── num_ope.py
    │   ├── argv_basic.py
    │   ├── decorator_basic.py
    │   ├── yield_ope.py
    │   ├── random_operator.py
    │   ├── decorator_set.py
    │   ├── dw_img_from_google.py
    │   ├── calendar_ope.py
    │   ├── operator_ope.py
    │   ├── obj_is.py
    │   ├── base64_test.py
    │   ├── functional_program.py
    │   ├── kwargs_xargs.py
    │   ├── str_basic.py
    │   ├── set_ope.py
    │   └── time_ope.py
    └── sk_sc_pd_operator
    │   ├── __init__.py
    │   ├── sc_distance_ope.py
    │   ├── pd_str.py
    │   ├── sk_KFlod.py
    │   ├── pd_plot.py
    │   ├── sk_metric_accuracy.py
    │   ├── pd_feature2value.py
    │   ├── split_train_test_data.py
    │   ├── pd_index.py
    │   ├── pd_visualize_diamond.py
    │   ├── pd_concat_join.py
    │   ├── pd_GridSearchCV.py
    │   ├── pd_ope.py
    │   ├── pd_dummy_val.py
    │   ├── pd_date_time.py
    │   ├── pd_pivot.py
    │   ├── pd_miss_data.py
    │   └── sk_feature_process.py
└── README.md


/python_utils/matplot/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/DbService/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/al_lt_common/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/distributed/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/netsocket/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/numpy_operate/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/opencv_basic/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/DbService/mysql_db/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/DbService/redis_db/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/PCA/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/opencv_basic/path_var.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/python_utils/thread_process/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/DbService/sqlalchemy/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/flask_web/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/http_realize/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/simple_rpc/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/socket_basic/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/wsgi_demo/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/Bayes/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/cluster/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/knearest/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/distributed/zookeeper_demo/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/decision_tree/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/perception/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/document/machine_learn/knearest/README.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/socket_basic/udp_sock/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/linear_regression/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/logistic_regression/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/http_realize/http_realize_1/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/http_realize/static_server/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/neural_network_keras/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/dataset/decision_tree/data_banknote_authentication.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/__init__.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 | 
3 | if __name__ == '__main__':
4 | 
5 |     pass
6 | 


--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | This package is common usage about sklearn, scipy, pandas library
3 | """


--------------------------------------------------------------------------------
/python_utils/DbService/config/mysql_config.json:
--------------------------------------------------------------------------------
1 | {
2 |   "host":"localhost","user":"root","pwd":"123456",
3 |   "db":"springdemo","port":3306
4 | }


--------------------------------------------------------------------------------
/python_utils/opencv_basic/cv_basic_op.py:
--------------------------------------------------------------------------------
1 | # _*_coding:utf-8 _*_
2 | 
3 | """
4 | This file is basic operator about cv2
5 | """
6 | 
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/decision_tree/dt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jayhello/python_utils/HEAD/python_utils/machine_learn/decision_tree/dt.png


--------------------------------------------------------------------------------
/python_utils/document/machine_learn/percepton/perception_plot.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jayhello/python_utils/HEAD/python_utils/document/machine_learn/percepton/perception_plot.jpg


--------------------------------------------------------------------------------
/python_utils/document/machine_learn/percepton/perception_ret.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jayhello/python_utils/HEAD/python_utils/document/machine_learn/percepton/perception_ret.jpg


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # python_utils
2 | 1.This project is my common use/record of python. For some utils if we don't use for a long time,we may forget it.
3 |   so I upload it to github.When I need to use some utils,I can get it directily from this project avoiding research form 
4 |   google.
5 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/http_realize/static_server/plain.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <title>Plain Page</title>
 6 | </head>
 7 | <body>
 8 | <h1>Plain Page</h1>
 9 | <p>Nothin' but HTML.</p>
10 | </body>
11 | </html>


--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/sc_distance_ope.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | from scipy.spatial import distance
 4 | import numpy as np
 5 | 
 6 | 
 7 | def eu_distance():
 8 |     a1 = np.array([1, 2, 3])
 9 |     a2 = np.array([3, 4, 5])
10 |     print distance.euclidean(a1, a2)
11 |     # 3.46410161514
12 | 
13 | if __name__ == '__main__':
14 |     eu_distance()
15 |     pass
16 | 


--------------------------------------------------------------------------------
/python_utils/document/numpy_operate/README.md:
--------------------------------------------------------------------------------
 1 | sometimes you want numbered lists
 2 | 
 3 | 1. one
 4 | 2. two
 5 | 
 6 | sometimes you want bullet points
 7 | 
 8 | * start a line with a start
 9 | * profit!
10 | 
11 | Alternatively,
12 | 
13 | - Dashes work just as well
14 | - And if you have sub points, put two spaces before the dash or star:
15 |   - Like this
16 |   - And this
17 |  * hello world!
18 | 


--------------------------------------------------------------------------------
/python_utils/document/machine_learn/percepton/README.md:
--------------------------------------------------------------------------------
1 | ## The running result images
2 | 1. ![console result](https://github.com/Jayhello/python_utils/blob/master/python_utils/document/machine_learn/percepton/perception_ret.jpg)
3 | 2. ![plot result](https://github.com/Jayhello/python_utils/blob/master/python_utils/document/machine_learn/percepton/perception_plot.jpg)
4 | 
5 | ## Example of realize perception
6 | 


--------------------------------------------------------------------------------
/python_utils/thread_process/basic_process.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | import os
 4 | import multiprocessing
 5 | 
 6 | 
 7 | def get_process_id():
 8 |     print os.getpid()
 9 |     # 8844
10 |     print multiprocessing.current_process().pid
11 |     # 8844
12 |     print multiprocessing.current_process().name
13 |     # MainProcess
14 | 
15 | 
16 | if __name__ == '__main__':
17 |     get_process_id()
18 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/arg_parse.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This file shows basic usage of argparse.
 3 | """
 4 | 
 5 | import argparse
 6 | 
 7 | ap = argparse.ArgumentParser()
 8 | ap.add_argument('-i', '--image', required=True, help='path to image file')
 9 | ap.add_argument('-w', '--weights', default='./cnn_weights.dat',
10 |                 help='path to weights file')
11 | 
12 | args = ap.parse_args()
13 | print args.image, args.weights
14 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/MD5_sha.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | import md5
 3 | import sha
 4 | import hashlib
 5 | 
 6 | 
 7 | def test_md5():
 8 |     content = 'hello xy, are you ok?'
 9 |     print hashlib.md5(content).hexdigest()
10 |     # 180d5f07d511b660f320cf2a645f1f3b
11 |     print hashlib.sha1(content).hexdigest()
12 |     # c25884a4688c8b1a25a619f198f91f8661b2623b
13 | 
14 | 
15 | if __name__ == '__main__':
16 |     test_md5()
17 |     pass
18 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/flask_web/flask_restful.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | from flask import Flask
 4 | from flask import jsonify
 5 | 
 6 | 
 7 | app = Flask(__name__)
 8 | 
 9 | 
10 | @app.route('/')
11 | def index():
12 |     return "hello world"
13 | 
14 | 
15 | @app.route('/idx')
16 | def index_js():
17 |     d = {"k": "hello world"}
18 |     return jsonify(d)
19 | 
20 | 
21 | if __name__ == '__main__':
22 |     app.run(host='0.0.0.0')
23 |     pass
24 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/log_config.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | import logging
 3 | 
 4 | # log config, module name, line num, function name
 5 | logging.basicConfig(
 6 |     format="%(asctime)s %(levelname)s %(module)s:%(lineno)s %(funcName)s %(threadName)s %(message)s",
 7 |     level=logging.DEBUG,
 8 |     datefmt='%Y-%m-%d %I:%M:%S'
 9 | )
10 | 
11 | 
12 | def test_log():
13 |     logging.info('hello world')
14 | 
15 | if __name__ == '__main__':
16 |     test_log()
17 |     pass
18 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/global_val.py:
--------------------------------------------------------------------------------
 1 | # _*_coding: utf-8 _*_
 2 | """
 3 | test for global variable
 4 | note that in multiprocess, every process has it's own
 5 | global variable, so if the function fun2 is in subprocess
 6 | it will still be 0
 7 | """
 8 | g_dst_dir = ''
 9 | 
10 | g_val = 0
11 | 
12 | 
13 | def fun2():
14 |     print g_val
15 | 
16 | 
17 | def fun1():
18 |     global g_val
19 |     g_val = 3
20 |     fun2()
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     fun1()
25 |     pass
26 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/http_realize/test_SimpleHTTPServer.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import SimpleHTTPServer
 4 | 
 5 | 
 6 | def test_translate_path():
 7 |     url = "http://yy.com/ai/xy/"
 8 |     handler = SimpleHTTPServer.SimpleHTTPRequestHandler(None, None, None)
 9 | 
10 |     print handler.translate_path(url)
11 | 
12 | 
13 | if __name__ == '__main__':
14 | 
15 |     # ----- test translate path -----
16 |     if 1:
17 |         test_translate_path()
18 |     # ----- end -----
19 | 
20 |     pass
21 | 


--------------------------------------------------------------------------------
/python_utils/numpy_operate/structured_arr.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | import numpy as np
 3 | 
 4 | 
 5 | def create_structured_arr():
 6 |     dtype = [('name', 'S10'), ('height', float), ('age', int)]
 7 |     arr_val = [('Arthur', 1.8, 41), ('Lancelot', 1.9, 38),
 8 |                ('Galahad', 1.7, 38)]
 9 | 
10 |     arr = np.array(arr_val, dtype=dtype)
11 |     print np.sort(arr, order='height')
12 |     pass
13 | 
14 | 
15 | if __name__ == '__main__':
16 |     create_structured_arr()
17 |     pass
18 | 


--------------------------------------------------------------------------------
/python_utils/distributed/zookeeper_demo/zk_failover_monitor.py:
--------------------------------------------------------------------------------
 1 | from kazoo.client import KazooClient
 2 | 
 3 | import time
 4 | 
 5 | import logging
 6 | logging.basicConfig()
 7 | 
 8 | zk = KazooClient(hosts='127.0.0.1:2181')
 9 | zk.start()
10 | 
11 | # Determine if a node exists
12 | while True:
13 |     if zk.exists("/test/failure_detection/worker"):
14 |         print "the worker is alive!"
15 |     else:
16 |         print "the worker is dead!"
17 |         break
18 |     time.sleep(3)
19 | 
20 | zk.stop()
21 | 


--------------------------------------------------------------------------------
/python_utils/distributed/zookeeper_demo/zk_watch.py:
--------------------------------------------------------------------------------
 1 | from kazoo.client import KazooClient
 2 | import time
 3 | 
 4 | import logging
 5 | logging.basicConfig()
 6 | 
 7 | zk = KazooClient(hosts='127.0.0.1:2181')
 8 | zk.start()
 9 | 
10 | 
11 | @zk.DataWatch('/test/zk1/node')
12 | def my_func(data, stat):
13 |     if data:
14 |         print "Data is %s" % data
15 |         print "Version is %s" % stat.version
16 |     else:
17 |         print "data is not available"
18 | 
19 | while True:
20 |     time.sleep(10)
21 | 
22 | zk.stop()
23 | 


--------------------------------------------------------------------------------
/python_utils/thread_process/sema_thread.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from random import random
 3 | from threading import Thread, Semaphore
 4 | 
 5 | sema = Semaphore(3)
 6 | 
 7 | 
 8 | def foo(tid):
 9 |     with sema:
10 |         print '{} acquire sema'.format(tid)
11 |         wt = random() * 2
12 |         time.sleep(wt)
13 |     print '{} release sema'.format(tid)
14 | 
15 | 
16 | threads = []
17 | for i in range(5):
18 |     t = Thread(target=foo, args=(i,))
19 |     threads.append(t)
20 |     t.start()
21 | for t in threads:
22 |     t.join()
23 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/simple_rpc/rpc_client_1.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import xmlrpclib
 4 | 
 5 | 
 6 | def test_client_1():
 7 |     host = "http://localhost:8888/"
 8 |     proxy = xmlrpclib.ServerProxy(host)
 9 |     print "using proxy %s" % proxy
10 | 
11 |     print "3 is even %s" % str(proxy.is_even(3))
12 |     print "100 is even %s" % str(proxy.is_even(100))
13 | 
14 | 
15 | if __name__ == '__main__':
16 | 
17 |     # ----- test simple rpv client -----
18 |     if 1:
19 |         test_client_1()
20 |     # ----- end -----
21 | 
22 |     pass
23 | 


--------------------------------------------------------------------------------
/python_utils/distributed/zookeeper_demo/zk_failover_worker.py:
--------------------------------------------------------------------------------
 1 | from kazoo.client import KazooClient
 2 | import time
 3 | 
 4 | import logging
 5 | logging.basicConfig()
 6 | 
 7 | zk = KazooClient(hosts='127.0.0.1:2181')
 8 | zk.start()
 9 | 
10 | # Ensure a path, create if necessary
11 | zk.ensure_path("/test/failure_detection")
12 | 
13 | # Create a node with data
14 | zk.create("/test/failure_detection/worker",
15 |           value=b"a test value", ephemeral=True)
16 | 
17 | while True:
18 |     print "I am alive!"
19 |     time.sleep(3)
20 | 
21 | zk.stop()
22 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/profile_ope.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import cProfile
 4 | 
 5 | 
 6 | def func1():
 7 |     sum = 0
 8 |     for i in range(1000000):
 9 |         sum += i
10 | 
11 |     #  1    0.167    0.167    0.167    0.167 {range}
12 |     #  4 function calls in 0.674 seconds
13 | 
14 | 
15 | def func2():
16 |     sum = 0
17 |     for i in xrange(1000000):
18 |         sum += i
19 | 
20 |     # 3 function calls in 0.350 seconds
21 | 
22 | if __name__ == '__main__':
23 |     # cProfile.run("func1()")
24 |     cProfile.run("func2()")
25 |     pass
26 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/flask_web/flask_resp.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | from flask import Flask
 4 | from flask import Response
 5 | import json
 6 | 
 7 | 
 8 | app = Flask(__name__)
 9 | 
10 | 
11 | @app.route('/hello', methods=['GET'])
12 | def api_hello():
13 |     data = {'name': 'xy', 'greet': "hello"}
14 |     js_str = json.dumps(data)
15 | 
16 |     resp = Response(js_str, status=200, mimetype='application/json')
17 |     resp.headers['Link'] = 'http://xy.com'
18 | 
19 |     return resp
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     app.run(host='0.0.0.0')
24 |     pass
25 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/with_usage.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | 
 4 | class Sample(object):
 5 |     def __enter__(self):
 6 |         print 'in __enter__'
 7 |         return "Foo"
 8 | 
 9 |     def __exit__(self, exc_type, exc_val, exc_tb):
10 |         print 'in __exit__'
11 | 
12 | 
13 | def get_sample():
14 |     return Sample()
15 | 
16 | 
17 | def test_with():
18 |     with get_sample() as sp:
19 |         print 'Sample: ', sp
20 | 
21 |     # in __enter__
22 |     # Sample:  Foo
23 |     # in __exit__
24 | 
25 | if __name__ == '__main__':
26 |     test_with()
27 |     pass
28 | 


--------------------------------------------------------------------------------
/python_utils/netsocket/basic_socket.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | """
 3 | This file is about some basic operator of socket
 4 | """
 5 | 
 6 | 
 7 | import socket
 8 | 
 9 | 
10 | def get_ip():
11 |     """
12 |     local host ip not 127.0.0.1
13 |     socket.gethostbyname(socket.gethostname()) will return 127.0.0.1
14 |     :return:
15 |     """
16 |     s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
17 |     s.connect(("gmail.com", 80))
18 |     # s.getsockname() -> ip:port
19 |     print s.getsockname()[0]
20 |     s.close()
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     get_ip()
25 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/collection_ope.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | """
 4 | operator about collection
 5 | """
 6 | 
 7 | from collections import Counter
 8 | 
 9 | 
10 | def counter_usage():
11 |     lst = ['class_1', 'class_2', 'class_1', 'class_1', 'class_1', 'class_2']
12 | 
13 |     print Counter(lst).most_common()
14 |     # [('class_1', 4), ('class_2', 2)]
15 | 
16 |     print Counter(lst).most_common(1)
17 |     # [('class_1', 4)]
18 | 
19 |     print Counter(lst).most_common(1)[0][0]
20 |     # class_1
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     counter_usage()
25 |     pass
26 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/except_ope.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | 
 4 | def test_except_scope():
 5 |     try:
 6 |         v = 'test scope'
 7 |         raise Exception
 8 |     except Exception as e:
 9 |         # if v is locals():
10 |         #     print v
11 |         print v
12 | 
13 | 
14 | def test_except():
15 |     try:
16 |         raise 7
17 |     except Exception as e:
18 |         print e
19 |         # exceptions must be old-style classes or derived from BaseException, not int
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     # test_except_scope()
24 |     test_except()
25 |     pass
26 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/simple_rpc/rpc_server_1.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | from SimpleXMLRPCServer import SimpleXMLRPCServer
 4 | 
 5 | 
 6 | def is_even(n):
 7 |     return n % 2
 8 | 
 9 | 
10 | def test_server_1():
11 |     port = 8888
12 |     rpc_server = SimpleXMLRPCServer(("localhost", port))
13 |     print 'now listening in %s' % port
14 | 
15 |     rpc_server.register_function(is_even, "is_even")
16 |     rpc_server.serve_forever()
17 | 
18 | 
19 | if __name__ == '__main__':
20 | 
21 |     # -----test rpc server 1-----
22 |     if 1:
23 |         test_server_1()
24 |     # ----- end -----
25 | 
26 |     pass
27 | 


--------------------------------------------------------------------------------
/python_utils/distributed/zookeeper_demo/zk_lock_demo.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | from kazoo.client import KazooClient
 4 | import time
 5 | import uuid
 6 | import logging
 7 | logging.basicConfig()
 8 | 
 9 | my_id = uuid.uuid4()
10 | 
11 | 
12 | def work():
13 |     print "{} is working! ".format(str(my_id))
14 | 
15 | 
16 | zk = KazooClient(hosts='127.0.0.1:2181')
17 | zk.start()
18 | 
19 | lock = zk.Lock("/lockpath", str(my_id))
20 | 
21 | print "I am {}".format(str(my_id))
22 | 
23 | while True:
24 |     with lock:
25 |         work()
26 |     time.sleep(3)
27 | 
28 | zk.stop()
29 | 
30 | if __name__ == '__main__':
31 | 
32 |     pass
33 | 


--------------------------------------------------------------------------------
/python_utils/distributed/zookeeper_demo/zk_master_select.py:
--------------------------------------------------------------------------------
 1 | from kazoo.client import KazooClient
 2 | import time
 3 | import uuid
 4 | 
 5 | import logging
 6 | logging.basicConfig()
 7 | 
 8 | my_id = uuid.uuid4()
 9 | 
10 | 
11 | def leader_func():
12 |     print "I am the leader {}".format(str(my_id))
13 |     while True:
14 |         print "{} is working! ".format(str(my_id))
15 |         time.sleep(3)
16 | 
17 | zk = KazooClient(hosts='127.0.0.1:2181')
18 | zk.start()
19 | 
20 | election = zk.Election("/electionpath")
21 | 
22 | # blocks until the election is won, then calls
23 | # leader_func()
24 | election.run(leader_func)
25 | 
26 | zk.stop()
27 | 


--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_str.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | 
 7 | def series_str():
 8 |     data = ['peter', 'Paul', None, 'MARY', 'gUIDO']
 9 |     names = pd.Series(data)
10 |     print names.str.capitalize()
11 |     # 0    Peter
12 |     # 1     Paul
13 |     # 2     None
14 |     # 3     Mary
15 |     # 4    Guido
16 |     # dtype: object
17 | 
18 |     print names.str.startswith('p')
19 |     # 0     True
20 |     # 1    False
21 |     # 2     None
22 |     # 3    False
23 |     # 4    False
24 |     # dtype: object
25 | 
26 | if __name__ == '__main__':
27 |     series_str()
28 |     pass
29 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/socket_basic/udp_sock/udp_client.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | import socket
 4 | 
 5 | """
 6 | Test for upd max end size.
 7 | """
 8 | 
 9 | 
10 | def udp_client():
11 |     host, port = "localhost", 8888
12 | 
13 |     sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
14 |     msg = 'a' * 1024 * 65
15 |     msg = 'a' * 65507
16 | 
17 |     sent = sock.sendto(msg, (host, port))
18 |     # sent = sock.sendto(msg, (host, port))
19 | 
20 |     data, server_add = sock.recvfrom(1024 * 64)
21 |     print 'rcv from %s: %s' % (server_add, len(data))
22 | 
23 | 
24 | if __name__ == '__main__':
25 |     udp_client()
26 |     pass
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/sk_KFlod.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | """
 3 | http://scikit-learn.org/stable/modules/cross_validation.html
 4 | """
 5 | 
 6 | 
 7 | import pandas as pd
 8 | import numpy as np
 9 | from sklearn.model_selection import KFold
10 | 
11 | 
12 | def kfold_1():
13 |     X = np.array([[3, 4, 0], [3, 2, 1], [5, 6, 0],
14 |                   [1, 2, 1], [1, 5, 0], [7, 4, 1]])
15 |     kf = KFold(n_splits=4)
16 |     for train, test in kf.split(X):
17 |         print "=======train: %s" % X[train].tolist()
18 |         # print X[train]
19 |         print "=======test : %s" % X[test].tolist()
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     kfold_1()
24 |     pass
25 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/flask_web/flask_file_svr.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | from flask import Flask
 4 | from flask import Response
 5 | 
 6 | 
 7 | app = Flask(__name__)
 8 | 
 9 | 
10 | @app.route('/audio/pcm_mp3/')
11 | def stream_mp3():
12 |     def generate():
13 |         path = 'F:/826.mp3'
14 |         with open(path, 'rb') as fmp3:
15 |             data = fmp3.read(1024)
16 |             while data:
17 |                 yield data
18 |                 data = fmp3.read(1024)
19 | 
20 |     return Response(generate(), mimetype="audio/mpeg3")
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     # so the other machine can visit the website by ip
25 |     app.run(host='0.0.0.0')
26 |     pass
27 | 


--------------------------------------------------------------------------------
/python_utils/numpy_operate/arr_vectorize.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def my_fun(a, b):
 7 |     if a >= b:
 8 |         return a - b
 9 |     else:
10 |         return a + b
11 | 
12 | 
13 | def test_vectorize():
14 |     v_fun = np.vectorize(my_fun)
15 |     arr = np.arange(8).reshape(2, 4)
16 |     print arr
17 |     # [[0 1 2 3]
18 |     #  [4 5 6 7]]
19 |     print v_fun(arr, 4)
20 |     # [[4 5 6 7]
21 |     #  [0 1 2 3]]
22 | 
23 |     squarer = lambda t: t ** 2
24 |     v_fun = np.vectorize(squarer)
25 |     print v_fun(arr)
26 |     # [[ 0  1  4  9]
27 |     #  [16 25 36 49]]
28 | 
29 | if __name__ == '__main__':
30 |     test_vectorize()
31 |     pass
32 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/socket_basic/udp_sock/udp_server.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | import socket
 4 | 
 5 | 
 6 | def udp_server():
 7 |     host, port = "localhost", 8888
 8 | 
 9 |     sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
10 |     sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
11 |     sock.bind((host, port))
12 | 
13 |     rcv_size = 1024 * 65
14 |     while True:
15 |         data, address = sock.recvfrom(rcv_size)
16 |         print "recv from %s: %s" % (address, len(data))
17 | 
18 |         if data:
19 |             send_size = sock.sendto(data, address)
20 |             print "sendto %s: %s" % (address, send_size)
21 | 
22 | 
23 | if __name__ == '__main__':
24 | 
25 |     udp_server()
26 | 
27 |     pass


--------------------------------------------------------------------------------
/python_utils/numpy_operate/flip_arr.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def flip_arr():
 7 |     arr = np.arange(6).reshape(2, 3)
 8 |     print arr
 9 |     # [[0 1 2]
10 |     #  [3 4 5]]
11 |     print np.fliplr(arr)
12 |     # [[2 1 0]
13 |     #  [5 4 3]]
14 |     print arr[:, ::-1]
15 |     # [[2 1 0]
16 |     #  [5 4 3]]
17 | 
18 |     print np.flipud(arr)
19 |     # [[3 4 5]
20 |     #  [0 1 2]]
21 |     print arr[::-1]
22 |     # [[3 4 5]
23 |     #  [0 1 2]]
24 | 
25 |     arr2 = np.arange(8).reshape((2, 2, 2))
26 |     print arr2
27 |     # [[[0 1]
28 |     #   [2 3]]
29 |     #
30 |     # [[4 5]
31 |     # [6 7]]]
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     flip_arr()
36 |     pass
37 | 


--------------------------------------------------------------------------------
/python_utils/DbService/mysql_db/DbBase.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | import MySQLdb
 4 | from utils.JsonUtil import get_json_from_file
 5 | 
 6 | 
 7 | class DbBase(object):
 8 |     def __init__(self, **kwargs):
 9 |         db_config_file = kwargs['db_config_file']
10 |         self.config_db(db_config_file)
11 | 
12 |     def config_db(self, db_config_file):
13 |         data = get_json_from_file(db_config_file)
14 |         host = data['host']
15 |         user = data['user']
16 |         pwd = data['pwd']
17 |         db = data['db']
18 |         port = data['port']
19 | 
20 |         self.conn = MySQLdb.connect(host=host, port=port, user=user, passwd=pwd, db=db, charset="utf8", use_unicode=True)
21 |         self.cursor = self.conn.cursor()
22 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/argparse_ope.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | """
 4 | this file is demo about basic args parse usage
 5 | """
 6 | 
 7 | import argparse
 8 | 
 9 | 
10 | def args_num_sum():
11 |     parser = argparse.ArgumentParser(description="process some integers")
12 | 
13 |     parser.add_argument('integers', metavar='N', type=int, nargs='+',
14 |                         help='an integer for accumulator')
15 | 
16 |     parser.add_argument('--sum', dest='accumulate', action='store_const',
17 |                          const=sum, default=max, help='sum the integers (default: find the max)')
18 | 
19 |     args = parser.parse_args()
20 |     print args.accumulate(args.integers)
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     args_num_sum()
25 |     pass
26 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/flask_web/flask_auth.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | """
 3 | cited https://github.com/miguelgrinberg/Flask-HTTPAuth
 4 | """
 5 | 
 6 | from flask import Flask
 7 | from flask import request
 8 | from flask_httpauth import HTTPBasicAuth
 9 | 
10 | 
11 | app = Flask(__name__)
12 | auth = HTTPBasicAuth()
13 | 
14 | users = {"xy1": "11111", "xy2": "22222"}
15 | 
16 | 
17 | @auth.get_password
18 | def get_pw(user_name):
19 |     if user_name in users:
20 |         return users.get(user_name)
21 | 
22 |     return None
23 | 
24 | 
25 | @app.route('/')
26 | @auth.login_required
27 | def index():
28 |     print request.authorization
29 |     return "hello, %s" % auth.username()
30 | 
31 | 
32 | if __name__ == '__main__':
33 |     app.run(host='0.0.0.0')
34 |     pass
35 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/PCA/pca_basic.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | import numpy as np
 3 | 
 4 | 
 5 | def get_eigen_vector():
 6 |     mt = np.array([[3, -1], [-1, 3]])
 7 |     eig_val, eig_vec = np.linalg.eig(mt)
 8 |     print eig_val
 9 |     print eig_vec
10 | 
11 | 
12 | def get_mean():
13 |     mt = np.array([[3, 1], [-1, 3]])
14 |     m_1 = np.mean(mt[0, :])
15 |     m_2 = np.mean(mt[1, :])
16 |     m = np.mean(mt)
17 |     print m_1
18 |     print m_2
19 |     print m
20 | 
21 |     c_1 = np.mean(mt[:, 0])
22 |     c_2 = np.mean(mt[:, 1])
23 |     print c_1
24 |     print c_2
25 |     m = np.mean(mt, axis=0)
26 |     # m = np.mean(mt, axis=(0, 1))
27 |     print m
28 | 
29 | if __name__ == '__main__':
30 |     # get_eigen_vector()
31 |     get_mean()
32 |     pass
33 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/http_realize/http_svr_simple.py:
--------------------------------------------------------------------------------
 1 | # _*_coding:utf-8 _*_
 2 | 
 3 | import BaseHTTPServer
 4 | 
 5 | 
 6 | class RequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
 7 | 
 8 |     Page = '''
 9 |     <html>
10 |         <body>
11 |             <p>Hello World</p>
12 |         </body>
13 |     </html>
14 |     '''
15 | 
16 |     def do_GET(self):
17 |         self.send_response(200)
18 |         self.send_header("Content-type", "text/html")
19 |         self.send_header("Content-Length", str(len(self.Page)))
20 |         self.end_headers()
21 |         self.wfile.write(self.Page)
22 | 
23 | 
24 | if __name__ == '__main__':
25 |     serverAddress = ('', 8888)
26 |     server = BaseHTTPServer.HTTPServer(serverAddress, RequestHandler)
27 |     server.serve_forever()
28 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/socket_basic/SocketServer_basic.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | from SocketServer import TCPServer, StreamRequestHandler
 3 | 
 4 | RESPONSE = b"""\
 5 | HTTP/1.1 200 OK
 6 | Content-type: text/html
 7 | Content-length: 15
 8 | 
 9 | <h1>Hello!</h1>""".replace(b"\n", b"\r\n")
10 | 
11 | 
12 | class MyHandler1(StreamRequestHandler):
13 |     """process tcp server, and send http response back"""
14 |     def handle(self):
15 |         addr = self.request.getpeername()
16 |         print 'get connection from %s, %s ' % addr
17 |         self.wfile.write(RESPONSE)
18 | 
19 | 
20 | def test_handler1():
21 |     # http server
22 |     server = TCPServer(('', 8888), MyHandler1)
23 |     server.serve_forever()
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     test_handler1()
28 |     pass
29 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/flask_web/flask_error_handler.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | 
 4 | from flask import Flask
 5 | from flask import request
 6 | from flask import jsonify
 7 | 
 8 | app = Flask(__name__)
 9 | 
10 | 
11 | @app.errorhandler(404)
12 | def not_found(error=None):
13 |     msg = {'status': 404, 'message': 'Not Found: ' + request.url}
14 | 
15 |     resp = jsonify(msg)
16 |     resp.status_code = 404
17 | 
18 |     return resp
19 | 
20 | 
21 | @app.route('/users/<userid>', methods=['GET'])
22 | def api_users(userid):
23 |     users = {'1': 'xy1', '2': 'xy2', '3': 'xy3'}
24 | 
25 |     if userid in users:
26 |         return jsonify({userid: users[userid]})
27 |     else:
28 |         return not_found()
29 | 
30 | if __name__ == '__main__':
31 |     app.run(host='0.0.0.0')
32 | 
33 |     pass
34 | 


--------------------------------------------------------------------------------
/python_utils/distributed/zookeeper_demo/zk_node_ope.py:
--------------------------------------------------------------------------------
 1 | from kazoo.client import KazooClient
 2 | 
 3 | import logging
 4 | logging.basicConfig()
 5 | 
 6 | zk = KazooClient(hosts='127.0.0.1:2181')
 7 | zk.start()
 8 | 
 9 | # Ensure a path, create if necessary
10 | zk.ensure_path("/test/zk1")
11 | 
12 | # Create a node with data
13 | zk.create("/test/zk1/node", b"a test value11")
14 | 
15 | # Determine if a node exists
16 | if zk.exists("/test/zk1"):
17 |     print "the node exist"
18 | 
19 | # Print the version of a node and its data
20 | data, stat = zk.get("/test/zk1")
21 | print("Version: %s, data: %s" % (stat.version, data.decode("utf-8")))
22 | 
23 | # List the children
24 | children = zk.get_children("/test/zk1")
25 | print("There are %s children with names %s" % (len(children), children))
26 | 
27 | zk.stop()
28 | 


--------------------------------------------------------------------------------
/python_utils/thread_process/basic_thread.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | """
 3 | This file is about basic operator about thread
 4 | """
 5 | 
 6 | import threading
 7 | import logging
 8 | logging.basicConfig(level=logging.DEBUG,
 9 |                     format='%(asctime)s %(threadName)s %(message)s',
10 |                     datefmt='%Y-%m-%d %I:%M:%S')
11 | 
12 | 
13 | def join_all_others_thread():
14 |     """
15 |     block the thread invoking this method
16 |     :return:
17 |     """
18 |     logging.debug('now join all the other threads')
19 |     main_thread = threading.currentThread()
20 |     for t in threading.enumerate():
21 |         if t is not main_thread:
22 |             t.join()
23 | 
24 |     # the following msg will be print after all the other thread done
25 |     logging.debug('join all the other threads success')
26 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/tuple_operate.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | 
 4 | def list_2_tuple():
 5 |     lst = ['python', 12, True, [1, 2]]
 6 |     tp = tuple(lst)
 7 |     print tp
 8 |     # ('python', 12, True, [1, 2])
 9 |     tp = tuple(tuple(item) for item in lst)
10 |     print tp
11 | 
12 | 
13 | def tuple_shift_left(tup, n):
14 |     """
15 |     shift tuple over by n indices
16 |     :param tup: like (1,2,3,4)
17 |     :param n: 1
18 |     :return: (2, 3, 4, 1)
19 |     """
20 |     if n < 0:
21 |         raise ValueError('n must be a positive integer')
22 |     if not tup or not n:
23 |         return tup
24 |     n %= len(tup)
25 |     return tup[n:] + tup[:n]
26 | 
27 | if __name__ == '__main__':
28 |     # list_2_tuple()
29 |     tp = (1, 2, 3, 4)
30 |     # print tuple_shift_left(tp, -1)
31 |     # (2, 3, 4, 1)
32 |     print tuple_shift_left(tp, 5)
33 |     # (2, 3, 4, 1)
34 | 


--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_plot.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import pandas as pd
 5 | import numpy as np
 6 | import seaborn as sns
 7 | sns.set()
 8 | 
 9 | 
10 | def hist_1():
11 |     d = {"label": np.random.choice([0, 1, 2], size=1000),
12 |          "values": np.random.randint(0, 10, size=1000)}
13 | 
14 |     df = pd.DataFrame(d)
15 | 
16 |     # df['label'].plot.hist(orientation='horizontal', cumulative=True)
17 |     fig, axes = plt.subplots(nrows=1, ncols=3, sharex=True, sharey=True)
18 |     df.hist(column="values", by="label", ax=axes)
19 |     plt.suptitle('Your Title Here', x=0.5, y=1.05, ha='center', fontsize='xx-large')
20 |     fig.text(0.5, 0.04, 'common X', ha='center')
21 |     fig.text(0.04, 0.5, 'common Y', va='center', rotation='vertical')
22 |     plt.show()
23 | 
24 | if __name__ == '__main__':
25 |     hist_1()
26 |     pass
27 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/num_ope.py:
--------------------------------------------------------------------------------
 1 | # _*_coding:utf-8 _*_
 2 | """
 3 | float decimal points
 4 | https://stackoverflow.com/questions/455612/limiting-floats-to-two-decimal-points
 5 | """
 6 | 
 7 | 
 8 | def round_float():
 9 |     f = 3.1415
10 |     print round(f, 2)
11 |     # 3.14
12 | 
13 |     a = 13.95
14 |     print a
15 |     # 13.95
16 | 
17 |     print "%.2f" % a
18 |     # 13.95
19 | 
20 |     print "%.2f" % 13.9499999
21 |     # 13.95
22 | 
23 |     a = 13.949999999999999
24 |     print format(a, '.2f')
25 |     # 13.95
26 | 
27 | 
28 | def or_shift():
29 |     print 1 >> 1, 1 << 1  # 0 2
30 |     print 1 | 4, 1 | 2  # 5 3
31 | 
32 | 
33 | def print_binary():
34 |     m, n = 5, -5
35 |     print '{0:b}'.format(m)
36 |     print '{0:b}'.format(n)
37 | 
38 |     print bin(m), bin(n)
39 | 
40 | 
41 | if __name__ == '__main__':
42 |     print_binary()
43 |     # round_float()
44 |     pass
45 | 


--------------------------------------------------------------------------------
/python_utils/numpy_operate/zero_one_empty.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def empty_test():
 7 |     arr = np.array([[2.5, 3, 1], [1.1, -2, 3]])
 8 |     print arr
 9 | 
10 |     print np.empty_like(arr)
11 | 
12 |     print np.empty([2, 2])
13 |     # [[2.02554939e-316   2.50034710e-315]
14 |     #  [1.97872580e-316   2.00283462e-316]]
15 | 
16 |     print np.empty([2, 2], dtype=int)
17 |     # [[58157000   0]
18 |     #  [58157064  0]]
19 | 
20 | 
21 | def zero_test():
22 |     print np.zeros(5)
23 |     # [ 0.  0.  0.  0.  0.]
24 |     print np.zeros((5,), dtype=np.int)
25 |     # [0 0 0 0 0]
26 |     print np.zeros((2, 1))
27 |     # [[ 0.]
28 |     #  [ 0.]]
29 |     x = np.arange(6).reshape((2, 3))
30 |     print np.zeros_like(x)
31 |     # [[0 0 0]
32 |     #  [0 0 0]]
33 | 
34 | if __name__ == '__main__':
35 |     # empty_test()
36 |     zero_test()
37 |     pass
38 | 


--------------------------------------------------------------------------------
/python_utils/opencv_basic/url_img_cv.py:
--------------------------------------------------------------------------------
 1 | import urllib
 2 | import cv2
 3 | import numpy as np
 4 | 
 5 | 
 6 | def url_img_cv():
 7 |     url = "http://yysnapshot.bs2src9.yy.com/636be6fc25410c5208d4c4ba5a22e2365768ec52?height=960&interval=12465&file=636be6fc25410c5208d4c4ba5a22e2365768ec52&width=544&bucket=yysnapshot&yid=7399736121338363914&day=20170817"
 8 |     try:
 9 |         url_response = urllib.urlopen(url)
10 |         img_array = np.array(bytearray(url_response.read()), dtype=np.uint8)
11 |         img = cv2.imdecode(img_array, -1)
12 |         cv2.imshow('URL Image', img)
13 |         cv2.waitKey()
14 |     except Exception, e:
15 |         print e
16 |     finally:
17 |         print 'no use line, nothing to be cleared'
18 |         # can't return None in this scope, because this file is certainly to be executed
19 |         # return None
20 | 
21 | if __name__ == '__main__':
22 |     url_img_cv()
23 | 


--------------------------------------------------------------------------------
/python_utils/DbService/sqlalchemy/orm.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | """
 4 | This file is about approaches of orm(object relational mapper)
 5 | in sql alchemy
 6 | """
 7 | 
 8 | from basic import get_db_session
 9 | from sqlalchemy import Column, String, Integer
10 | from sqlalchemy.ext.declarative import declarative_base
11 | from sqlalchemy.ext.hybrid import hybrid_property
12 | Base = declarative_base()
13 | 
14 | 
15 | class User(Base):
16 |     __tablename__ = 'user'
17 | 
18 |     id = Column(Integer, primary_key=True)
19 |     nickname = Column(String)
20 |     password = Column(String)
21 | 
22 |     @hybrid_property
23 |     def name_pwd(self):
24 |         return self.nickname + " " + self.password
25 | 
26 | 
27 | def query_orm():
28 |     session = get_db_session()
29 |     user = session.query(User).first()
30 |     print user.name_pwd
31 | 
32 | if __name__ == '__main__':
33 |     query_orm()
34 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/flask_web/flask_basic_web.py:
--------------------------------------------------------------------------------
 1 | # _*_coding:utf-8 _*_
 2 | 
 3 | from flask import Flask
 4 | from flask import abort
 5 | from flask import redirect
 6 | from flask import request
 7 | from flask import Response
 8 | 
 9 | 
10 | app = Flask(__name__)
11 | 
12 | 
13 | @app.route('/')
14 | def index():
15 |     return '<h1>from win10 slow machine</hi>'
16 | 
17 | 
18 | @app.route('/user/<name>')
19 | def say_hello(name):
20 |     return '<h1>hello, %s</h1>' % name
21 | 
22 | 
23 | @app.route('/paras/')
24 | def multi_paras():
25 |     ret_str = ''
26 |     for para in request.args:
27 |         print para, request.args[para]
28 |         ret_str += para
29 | 
30 |     return '<h1>multi_paras, %s</h1>' % ret_str
31 | 
32 | 
33 | if __name__ == '__main__':
34 |     # app.run(debug=True)
35 |     # so the other machine can visit the website by ip
36 |     app.run(host='0.0.0.0', port=5000)
37 | 
38 |     pass
39 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/linear_regression/sk_example.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import seaborn as sns
 5 | import numpy as np
 6 | from sklearn.linear_model import LinearRegression
 7 | sns.set()
 8 | 
 9 | 
10 | def get_data():
11 |     rng = np.random.RandomState(1)
12 |     x = 10 * rng.rand(50)
13 |     y = 2 * x - 5 + rng.randn(50)
14 |     # plt.scatter(x, y)
15 |     # plt.show()
16 |     return x, y
17 | 
18 | 
19 | def lr_fit():
20 |     x, y = get_data()
21 |     model = LinearRegression(fit_intercept=True)
22 |     model.fit(x[:, np.newaxis], y)
23 |     xfit = np.linspace(0, 10, 1000)
24 |     yfit = model.predict(xfit[:, np.newaxis])
25 | 
26 |     print "Model slope: ", model.coef_[0]
27 |     print "Model intercept:", model.intercept_
28 | 
29 |     plt.scatter(x, y)
30 |     plt.plot(xfit, yfit)
31 |     plt.show()
32 | 
33 | if __name__ == '__main__':
34 |     lr_fit()
35 |     # get_data()
36 |     pass
37 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/neural_network_keras/lstm_nlp.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | """
 4 | This file is about using lstm to do nlp problem,
 5 | classify sentiment of movie review
 6 | """
 7 | import numpy as np
 8 | from keras.datasets import imdb
 9 | from keras.models import Sequential
10 | from keras.layers import Dense, LSTM
11 | from keras.layers.embeddings import Embedding
12 | from keras.preprocessing import sequence
13 | 
14 | 
15 | def generate_data():
16 |     # load the dataset but only keep the top n words, zero the rest
17 |     top_words = 5000
18 |     (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)
19 |     # truncate and pad input sequences
20 |     max_review_length = 500
21 |     X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
22 |     X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     generate_data()
27 |     pass
28 | 


--------------------------------------------------------------------------------
/python_utils/matplot/plot_accuracy_loss.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | 
 7 | def draw_result(lst_iter, lst_loss, lst_acc, title):
 8 |     plt.plot(lst_iter, lst_loss, '-b', label='loss')
 9 |     plt.plot(lst_iter, lst_acc, '-r', label='accuracy')
10 | 
11 |     plt.xlabel("n iteration")
12 |     plt.legend(loc='upper left')
13 |     plt.title(title)
14 |     plt.savefig(title+".png")  # should before show method
15 | 
16 |     plt.show()
17 | 
18 | 
19 | def test_draw():
20 |     lst_iter = range(100)
21 |     lst_loss = [0.01 * i - 0.01 * i ** 2 for i in xrange(100)]
22 |     # lst_loss = np.random.randn(1, 100).reshape((100, ))
23 |     lst_acc = [0.01 * i + 0.01 * i ** 2 for i in xrange(100)]
24 |     # lst_acc = np.random.randn(1, 100).reshape((100, ))
25 |     draw_result(lst_iter, lst_loss, lst_acc, "sgd_method")
26 | 
27 | 
28 | if __name__ == '__main__':
29 |     test_draw()
30 |     pass
31 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/argv_basic.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | import sys
 4 | import logging
 5 | 
 6 | 
 7 | def parse_js():
 8 |     kv = {}
 9 |     for line in sys.stdin:
10 |         print line
11 | 
12 |     if len(sys.argv) > 4:
13 |         try:
14 |             print sys.argv[1]
15 |             id = int(sys.argv[1])
16 |             sid = int(sys.argv[2])
17 |             pre = int(sys.argv[3])
18 |         except ValueError, e:
19 |             logging.error("Can't convert id or sid to int")
20 |         sys.exit()
21 |     else:
22 |         msg = sys.argv[4]
23 |         for arg in sys.argv[5:]:
24 |             msg += " "
25 |             msg += arg
26 |         kv["id"] = id
27 |         kv["alarm"] = 1
28 |         kv["msg"] = msg.replace("'", "")
29 | 
30 | if __name__ == '__main__':
31 |     # config script para
32 |     print sys.argv
33 |     # ['E:/git_code/python_utils/py_basic/argv_basic.py', '1', '2', '3']
34 |     print len(sys.argv)
35 |     # 4
36 |     # parse_js()
37 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/decorator_basic.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | """
 4 | python @ usage example
 5 | """
 6 | 
 7 | 
 8 | def hello(fn):
 9 |     def wrapper():
10 |         print "hello, %s" % fn.__name__
11 |         fn()
12 |         print 'bye, %s' % fn.__name__
13 |     return wrapper
14 | 
15 | 
16 | def do_nothing(fn):
17 |     def wrapper():
18 |         print 'do not exe fn'
19 |     return wrapper
20 | 
21 | 
22 | @hello
23 | def foo():
24 |     print 'I am foo'
25 |     # hello, foo
26 |     # I am foo
27 |     # bye, foo
28 | 
29 | 
30 | @do_nothing
31 | def foo_nothing():
32 |     print 'I am foo_nothing'
33 |     # do not exe fn
34 | 
35 | 
36 | @do_nothing
37 | @hello
38 | def foo_nested():
39 |     print 'I am foo_nested'
40 |     # do not exe fn
41 | 
42 | 
43 | @hello
44 | @do_nothing
45 | def foo_nested_v2():
46 |     print 'I am foo_nested_v2'
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     # foo()
51 |     # foo_nothing()
52 |     # foo_nested()
53 |     foo_nested_v2()
54 |     pass
55 | 


--------------------------------------------------------------------------------
/python_utils/numpy_operate/broadcast_demo.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def broadcast_demo():
 7 |     arr = np.ones((3, 4))
 8 |     print arr
 9 |     print arr + 1
10 | 
11 |     b = np.broadcast(arr, 1)
12 |     print b.shape
13 |     # (3L, 4L)
14 | 
15 | 
16 | def bc_demo_2():
17 |     a = np.array([1.0, 2.0, 3])
18 |     b = np.ones(3) * 2
19 |     print b
20 |     # [ 2.  2.  2.]
21 |     print a * b
22 |     # [ 2.  4.  6.]
23 | 
24 |     b = 2
25 |     print a * b
26 |     # [ 2.  4.  6.]
27 | 
28 |     x = np.arange(4).reshape(4, 1)
29 |     print x
30 |     # [[0]
31 |     #  [1]
32 |     #  [2]
33 |     #  [3]]
34 |     y = np.ones(5)
35 |     print y
36 |     # [ 1.  1.  1.  1.  1.]
37 |     z = x + y
38 |     print z
39 |     # [[ 1.  1.  1.  1.  1.]
40 |     #  [ 2.  2.  2.  2.  2.]
41 |     #  [ 3.  3.  3.  3.  3.]
42 |     #  [ 4.  4.  4.  4.  4.]]
43 |     print z.shape
44 |     # (4L, 5L)
45 | 
46 | if __name__ == '__main__':
47 |     # broadcast_demo()
48 |     bc_demo_2()
49 |     pass
50 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/decision_tree/tree.dot:
--------------------------------------------------------------------------------
 1 | digraph Tree {
 2 | node [shape=box] ;
 3 | 0 [label="loan level <= 0.5\ngini = 0.48\nsamples = 15\nvalue = [6, 9]"] ;
 4 | 1 [label="has work <= 0.5\ngini = 0.32\nsamples = 5\nvalue = [4, 1]"] ;
 5 | 0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ;
 6 | 2 [label="gini = 0.0\nsamples = 4\nvalue = [4, 0]"] ;
 7 | 1 -> 2 ;
 8 | 3 [label="gini = 0.0\nsamples = 1\nvalue = [0, 1]"] ;
 9 | 1 -> 3 ;
10 | 4 [label="own house <= 0.5\ngini = 0.32\nsamples = 10\nvalue = [2, 8]"] ;
11 | 0 -> 4 [labeldistance=2.5, labelangle=-45, headlabel="False"] ;
12 | 5 [label="age <= 1.5\ngini = 0.4444\nsamples = 6\nvalue = [2, 4]"] ;
13 | 4 -> 5 ;
14 | 6 [label="has work <= 0.5\ngini = 0.4444\nsamples = 3\nvalue = [2, 1]"] ;
15 | 5 -> 6 ;
16 | 7 [label="gini = 0.0\nsamples = 2\nvalue = [2, 0]"] ;
17 | 6 -> 7 ;
18 | 8 [label="gini = 0.0\nsamples = 1\nvalue = [0, 1]"] ;
19 | 6 -> 8 ;
20 | 9 [label="gini = 0.0\nsamples = 3\nvalue = [0, 3]"] ;
21 | 5 -> 9 ;
22 | 10 [label="gini = 0.0\nsamples = 4\nvalue = [0, 4]"] ;
23 | 4 -> 10 ;
24 | }


--------------------------------------------------------------------------------
/python_utils/DbService/sqlalchemy/basic.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | from sqlalchemy import create_engine
 4 | from sqlalchemy.orm import sessionmaker
 5 | from utils.JsonUtil import get_json_from_file
 6 | 
 7 | 
 8 | def get_db_session():
 9 |     db_config_file = '../config/mysql_config.json'
10 |     db_js_data = get_json_from_file(db_config_file)
11 |     db_connect = 'mysql+mysqldb://{user}:{pwd}@{host}/{db}?charset=utf8'.format(**db_js_data)
12 |     print db_connect
13 |     # mysql+mysqldb://root:123@localhost/springdemo?charset=utf8
14 |     engine = create_engine(db_connect, echo=True)
15 |     session = sessionmaker(bind=engine)
16 |     return session()
17 | 
18 | 
19 | def query_example():
20 |     session = get_db_session()
21 |     print session.execute('show databases').fetchall()
22 |     # [(u'springdemo',), (u'test',), (u'world',)]
23 |     print session.execute('select * from tb_yylive_news where id = 1').first()
24 |     # (1L, u'http://www.bbc.com')
25 | 
26 | if __name__ == '__main__':
27 |     # get_db_session()
28 |     query_example()
29 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/cluster/sk_cluster.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | """
 4 | https://jakevdp.github.io/PythonDataScienceHandbook/05.11-k-means.html
 5 | """
 6 | 
 7 | from sklearn.cluster import KMeans
 8 | from sklearn.datasets.samples_generator import make_blobs
 9 | import matplotlib.pyplot as plt
10 | import seaborn as sns
11 | sns.set()
12 | import numpy as np
13 | 
14 | 
15 | def get_data():
16 |     x, y_true = make_blobs(n_samples=300, centers=4,
17 |                            cluster_std=0.60, random_state=0)
18 |     plt.scatter(x[:, 0], x[:, 1], s=50)
19 |     # plt.show()
20 |     return x
21 | 
22 | 
23 | def predict():
24 |     x = get_data()
25 |     kmeans = KMeans(n_clusters=4)
26 |     kmeans.fit(x)
27 |     y_kmeans = kmeans.predict(x)
28 |     # print y_kmeans
29 |     plt.scatter(x[:, 0], x[:, 1], c=y_kmeans, s=50, cmap='viridis')
30 |     centers = kmeans.cluster_centers_
31 |     print centers
32 |     plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5)
33 |     plt.show()
34 | 
35 | if __name__ == '__main__':
36 |     # get_data()
37 |     predict()
38 |     pass
39 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/http_realize/http_svr_basic_1.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import socket
 4 | import BaseHTTPServer
 5 | import time
 6 | 
 7 | 
 8 | HOST = "127.0.0.1"
 9 | PORT = 8888
10 | 
11 | RESPONSE = b"""\
12 | HTTP/1.1 200 OK
13 | Content-type: text/html
14 | Content-length: 15
15 | 
16 | <h1>Hello!</h1>""".replace(b"\n", b"\r\n")
17 | 
18 | 
19 | RESPONSE = 'a' * (1024 * 1)
20 | 
21 | 
22 | def test_simple():
23 |     server_sock = socket.socket()
24 |     server_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
25 |     server_sock.bind((HOST, PORT))
26 |     server_sock.listen(0)
27 |     print "Listening on %s:%s..." % (HOST, PORT)
28 | 
29 |     while 1:
30 |         client_sock, client_addr = server_sock.accept()
31 |         print "New connection from %s:%s." % (client_addr)
32 |         # client_sock.sendall(RESPONSE)
33 |         time.sleep(12)
34 | 
35 |         data = client_sock.recv(1024)
36 |         print "recv :%s" % data
37 | 
38 |         n = client_sock.send(RESPONSE)
39 |         print "just send %s bytes" % n
40 | 
41 | 
42 | if __name__ == '__main__':
43 |     test_simple()
44 |     pass
45 | 


--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/sk_metric_accuracy.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | import numpy as np
 4 | from sklearn.metrics import accuracy_score, confusion_matrix
 5 | from sklearn.metrics import classification_report
 6 | 
 7 | 
 8 | def accuracy_score_demo():
 9 |     y_true = [0, 1, 2, 3, 2, 6]
10 |     y_pred = [0, 2, 1, 3, 4, 7]
11 |     print accuracy_score(y_true, y_pred)
12 |     # 0.5
13 |     print confusion_matrix(y_true, y_pred)
14 | 
15 |     y_true = [0, 1, 0, 1, 1, 0, 1, 0, 1]
16 |     y_pred = [0, 0, 1, 0, 0, 0, 1, 1, 0]
17 |     print confusion_matrix(y_true, y_pred)
18 |     # [[2 2]    四个 0 两个被识别为了 0， 两个被识别为 1
19 |     #  [4 1]]   五个 1 四个被识别为了 0， 一个被识别为 1
20 | 
21 | 
22 | def classification_report_demo():
23 |     y_pred = [0, 0, 2, 1, 0]
24 |     y_true = [0, 1, 2, 2, 0]
25 |     target_names = ['class 0', 'class 1', 'class 2']
26 |     print classification_report(y_true, y_pred, target_names=target_names)
27 | 
28 |     print confusion_matrix(y_true, y_pred)
29 |     # [[2 0 0]
30 |     #  [1 0 0]
31 |     #  [0 1 1]]
32 | 
33 | if __name__ == '__main__':
34 |     classification_report_demo()
35 |     # accuracy_score_demo()
36 |     pass
37 | 


--------------------------------------------------------------------------------
/python_utils/thread_process/thread_timer.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | """
 3 | python timer usage cited from:
 4 | http://www.bogotobogo.com/python/Multithread/python_multithreading_subclassing_Timer_Object.php
 5 | https://stackoverflow.com/questions/16578652/threading-timer
 6 | 
 7 | repeat timer
 8 | https://stackoverflow.com/questions/12435211/python-threading-timer-repeat-function-every-n-seconds
 9 | """
10 | 
11 | import time
12 | import threading
13 | 
14 | 
15 | def hello():
16 |     print 'hello world'
17 | 
18 | 
19 | def hello_2(name):
20 |     print 'hello %s \n' % name
21 | 
22 | 
23 | def test_timer_no_para():
24 |     """no parameter timer test
25 |     """
26 |     t = threading.Timer(5, hello)
27 |     # the method will be executed after 5 S
28 |     # just run once, then exit
29 |     t.start()
30 | 
31 | 
32 | def test_timer_with_para():
33 |     """no parameter timer test
34 |     """
35 |     name = 'bear fish'
36 |     t = threading.Timer(5, hello_2, [name])
37 |     # the method will be executed after 5 S
38 |     # just run once, then exit
39 |     t.start()
40 | 
41 | if __name__ == '__main__':
42 |     test_timer_no_para()
43 |     test_timer_with_para()
44 |     pass
45 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/decision_tree/create_data.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def get_loan_data_lh():
 7 |     """
 8 |     this data is from li hang's book
 9 |     the feature is
10 |     age(0->young, 1->middle-aged, 2->old),
11 |     have work(0->have, 1-not have),
12 |     have house(0->have, 1-not have),
13 |     loan level(0->just so so, 1->good, 2->very good)
14 |     """
15 |     x = np.array([
16 |         [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2],
17 |         [0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
18 |         [0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0],
19 |         [0, 1, 1, 0, 0, 0, 1, 1, 2, 2, 2, 1, 1, 2, 0]
20 |     ])
21 | 
22 |     x = x.T
23 |     y = np.array([0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0])
24 |     return x, y
25 | 
26 | 
27 | def get_data():
28 |     x = [[0, 0, 0, 0, 'N'],
29 |          [0, 0, 0, 1, 'N'],
30 |          [1, 0, 0, 0, 'Y'],
31 |          [2, 1, 0, 0, 'Y'],
32 |          [2, 2, 1, 0, 'Y'],
33 |          [2, 2, 1, 1, 'N'],
34 |          [1, 2, 1, 1, 'Y']]
35 | 
36 |     y = ['outlook', 'temperature', 'humidity', 'windy']
37 |     return x, y
38 | 
39 | if __name__ == '__main__':
40 | 
41 |     pass
42 | 


--------------------------------------------------------------------------------
/python_utils/thread_process/multitread_profile.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | """
 3 | 计算型的任务，使用多线程GIL就会让多线程变慢。我们举个计算斐波那契数列的例子
 4 | http://www.dongwm.com/archives/%E4%BD%BF%E7%94%A8Python%E8%BF%9B%E8%A1%8C%E5%B9%B6%E5%8F%91%E7%BC%96%E7%A8%8B-%E7%BA%BF%E7%A8%8B%E7%AF%87/
 5 | 
 6 | """
 7 | 
 8 | import time
 9 | import threading
10 | 
11 | 
12 | def profile(func):
13 |     def wrapper(*args, **kwargs):
14 |         start = time.time()
15 |         func(*args, **kwargs)
16 |         end = time.time()
17 |         print 'COST: {}'.format(end - start)
18 |     return wrapper
19 | 
20 | 
21 | def fib(n):
22 |     if n <= 2:
23 |         return 1
24 |     return fib(n-1) + fib(n-2)
25 | 
26 | 
27 | @profile
28 | def nothread():
29 |     fib(35)
30 |     fib(35)
31 | 
32 | 
33 | @profile
34 | def hasthread():
35 |     for i in range(2):
36 |         t = threading.Thread(target=fib, args=(35,))
37 |         t.start()
38 |     main_thread = threading.currentThread()
39 |     for t in threading.enumerate():
40 |         if t is main_thread:
41 |             continue
42 |         t.join()
43 | 
44 | if __name__ == '__main__':
45 |     nothread()
46 |     # COST: 16.8039999008
47 |     hasthread()
48 |     # COST: 42.8039999008
49 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/flask_web/flask_content_type.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | """
 4 | cited from http://blog.luisrei.com/articles/flaskrest.html
 5 | test different http request head Content-Type
 6 | """
 7 | 
 8 | from flask import Flask
 9 | from flask import request
10 | import json
11 | 
12 | app = Flask(__name__)
13 | 
14 | 
15 | @app.route('/message', methods=['POST'])
16 | def api_msg():
17 |     if request.headers['Content-Type'] == 'text/plain':
18 |         return "Text Message: " + request.data
19 | 
20 |     elif request.headers['Content-Type'] == 'application/json':
21 |         return "Json Message: " + json.dumps(request.json)
22 | 
23 |     elif request.headers['Content-Type'] == 'application/octet-stream':
24 |         print len(request.data)
25 |         with open('./file.name', 'wb') as f:
26 |             f.write(request.data)
27 | 
28 |         return "binary file written"
29 | 
30 |     elif request.headers['Content-Type'] == 'multipart/form-data':
31 |         print "111"
32 |         print request.args
33 |     else:
34 |         return "415 unsupported media type"
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     # app.run(debug=True)
39 |     app.run(host='0.0.0.0')
40 |     pass
41 | 


--------------------------------------------------------------------------------
/python_utils/numpy_operate/array_multiply.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | """
 4 | This file is some demo about numpy array multiply operator
 5 | """
 6 | import numpy as np
 7 | 
 8 | 
 9 | def one_dim_arr_multiply():
10 |     """
11 |     the difference between * and dot operator about numpy.array()
12 |     of one dimension
13 |     :return none:
14 |     """
15 |     arr1 = np.array([1, 2])
16 |     arr2 = np.array([3, 4])
17 |     print arr1 * arr2  # -> [3 8]
18 |     # for 1 dim array np.dot gets inner product of vector
19 |     print np.dot(arr1, arr2.transpose())  # 11
20 |     print arr1, arr2.transpose()
21 |     print np.dot(arr1, arr2)  # 11
22 | 
23 | 
24 | def mul_dim_arr_multiply():
25 |     """
26 |     the difference between * and dot operator about numpy.array()
27 |     of multiple dimension
28 |     """
29 |     arr1 = np.array([[1], [2]])
30 |     arr2 = np.array([[3], [4]])
31 |     print arr1 * arr2
32 |     # >> [[3]
33 |     #     [8]]
34 |     print np.dot(arr1, arr2.transpose())
35 |     # >>>[[3 4]
36 |     #     [6 8]]
37 |     print np.dot(arr1, arr2)
38 |     # ValueError: shapes (2,1) and (2,1) not aligned: 1 (dim 1) != 2 (dim 0)
39 | 
40 | if __name__ == '__main__':
41 |     # one_dim_arr_multiply()
42 |     mul_dim_arr_multiply()
43 | 


--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_feature2value.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | import matplotlib.pyplot as plt
 6 | 
 7 | 
 8 | def factorize_1():
 9 |     df = pd.DataFrame({"A": list('cbaa'),
10 |                        "B": list('zyxz')})
11 | 
12 |     print df
13 |     #    A  B
14 |     # 0  c  z
15 |     # 1  b  y
16 |     # 2  a  x
17 |     # 3  a  z
18 | 
19 |     print df.apply(lambda col: pd.factorize(col, sort=True)[0])
20 |     #    A  B
21 |     # 0  2  2
22 |     # 1  1  1
23 |     # 2  0  0
24 |     # 3  0  2
25 | 
26 |     print df.apply(lambda col: pd.factorize(col)[0])
27 |     #    A  B
28 |     # 0  0  0
29 |     # 1  1  1
30 |     # 2  2  2
31 |     # 3  2  0
32 | 
33 | 
34 | def count_1():
35 |     df = pd.DataFrame({'a': list('absba')})
36 |     print df.groupby('a')['a'].count()
37 |     #      a
38 |     # a    2
39 |     # b    2
40 |     # s    1
41 | 
42 |     print df['a'].value_counts()
43 |     # b    2
44 |     # a    2
45 |     # s    1
46 |     df['freq'] = df.groupby('a')['a'].transform('count')
47 |     print df
48 |     #    a  freq
49 |     # 0  a     2
50 |     # 1  b     2
51 |     # 2  s     1
52 |     # 3  b     2
53 |     # 4  a     2
54 | 
55 | if __name__ == '__main__':
56 |     count_1()
57 |     # factorize_1()
58 |     pass
59 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/http_realize/http_realize_1/http_server_1.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | import socket
 4 | 
 5 | 
 6 | RESPONSE = b"""\
 7 | HTTP/1.1 200 OK
 8 | Content-type: text/html
 9 | Content-length: 15
10 | 
11 | <h1>Hello!</h1>""".replace(b"\n", b"\r\n")
12 | 
13 | 
14 | def http_server_1():
15 |     # host, port = "127.0.0.1", 8888
16 |     host, port = "127.0.0.1", 0
17 | 
18 |     # By default, socket.socket creates TCP sockets.
19 |     # with socket.socket() as server_sock:
20 |     server_sock = socket.socket()
21 |     # This tells the kernel to reuse sockets that are in `TIME_WAIT` state.
22 |     server_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
23 | 
24 |     # This tells the socket what address to bind to.
25 |     server_sock.bind((host, port))
26 | 
27 |     server_sock.listen(0)
28 | 
29 |     # print "listening on %s:%s" % (host, port)
30 |     print "listening on %s:%s" % (server_sock.getsockname())
31 | 
32 |     while True:
33 |         client_sock, client_addr = server_sock.accept()
34 |         print "new connection from %s: %s" % (client_addr, client_sock)
35 |         data = client_sock.recv(1024)
36 |         print "recv :%s" % data
37 | 
38 |         with client_sock:
39 |             client_sock.sendall(RESPONSE)
40 | 
41 | 
42 | if __name__ == '__main__':
43 |     http_server_1()
44 |     pass
45 | 


--------------------------------------------------------------------------------
/python_utils/numpy_operate/np_distance.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | """
 4 | 
 5 | """
 6 | import numpy as np
 7 | 
 8 | 
 9 | def euclidean_distance():
10 |     a1 = np.array([1, 2, 3])
11 |     a2 = np.array([3, 4, 5])
12 |     print np.square(a1, a2)
13 | 
14 | 
15 | def np_sum():
16 |     a1 = np.array([1, 2, 3])
17 |     a2 = np.array([3, 4, 5])
18 |     print a1 - a2
19 |     # [-2 -2 -2]
20 |     print (a1 - a2)**2
21 |     # [4 4 4]
22 |     print np.sum(a1 - a2)
23 |     # -6
24 |     print np.sum((a1 - a2)**2)
25 |     # 12
26 | 
27 | 
28 | def np_sqrt():
29 |     a1 = np.array([1, 4, 9])
30 |     print np.sqrt(a1)
31 |     # [ 1.  2.  3.]
32 | 
33 | 
34 | def euclidean_distance_v2():
35 |     a1 = np.array([1, 2, 3])
36 |     a2 = np.array([3, 4, 5])
37 |     print np.sqrt(np.sum((a1 - a2)**2))
38 |     # 3.46410161514
39 | 
40 | 
41 | def euclidean_distance_v3():
42 |     a1 = np.array([1, 2, 3])
43 |     a2 = np.array([3, 4, 5])
44 |     print np.linalg.norm(a1 - a2)
45 |     # 3.46410161514
46 | 
47 | 
48 | def eu_distance():
49 |     a1 = [1, 2, 3]
50 |     a2 = [3, 4, 5]
51 |     from math import sqrt
52 |     print sqrt(sum((a - b)**2 for a, b in zip(a1, a2)))
53 |     # 3.46410161514
54 | 
55 | if __name__ == '__main__':
56 |     euclidean_distance()
57 |     # euclidean_distance_v2()
58 |     # np_sum()
59 |     # euclidean_distance_v3()
60 |     # np_sqrt()
61 |     pass
62 | 


--------------------------------------------------------------------------------
/python_utils/thread_process/thread_condition.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | """
 3 | This file is about a sample demo about threading.condition
 4 | """
 5 | 
 6 | import threading
 7 | import time
 8 | import logging
 9 | from basic_thread import join_all_others_thread
10 | logging.basicConfig(level=logging.DEBUG,
11 |                     format='%(asctime)s %(threadName)s %(message)s',
12 |                     datefmt='%Y-%m-%d %I:%M:%S')
13 | 
14 | 
15 | def notify_condition(con):
16 |     with con:
17 |         logging.debug('now notify all the condition')
18 |         con.notifyAll()
19 | 
20 | 
21 | def wait_condition(con):
22 |     with con:
23 |         logging.debug('I am waiting for an condition')
24 |         con.wait()
25 |         logging.debug('I get the condition.....')
26 | 
27 | 
28 | def test_demo():
29 |     con = threading.Condition()
30 |     t_w1 = threading.Thread(name='t_w1', target=wait_condition, args=(con, ))
31 |     t_w2 = threading.Thread(name='t_w2', target=wait_condition, args=(con, ))
32 |     t_n1 = threading.Thread(name='t_n1', target=notify_condition, args=(con, ))
33 |     t_w1.start()
34 |     t_w2.start()
35 | 
36 |     logging.debug('now main thread sleeping 5S')
37 |     time.sleep(5)
38 |     t_n1.start()
39 | 
40 |     join_all_others_thread()
41 |     logging.debug('now all have been done')
42 | 
43 | if __name__ == '__main__':
44 |     test_demo()
45 |     pass
46 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/yield_ope.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | 
 4 | def f_135():
 5 |     yield 1
 6 |     yield 3
 7 |     yield 5
 8 | 
 9 | 
10 | def demo_1():
11 |     for val in f_135():
12 |         print val
13 | 
14 |     # 1 3 5
15 | 
16 |     generator = f_135()
17 |     print next(generator)
18 |     # 1
19 |     print next(generator)
20 |     # 3
21 |     print next(generator)
22 |     # 5
23 | 
24 | 
25 | def fibonacci(n):
26 |     cur = 1
27 |     pre = 0
28 |     count = 0
29 |     while count < n:
30 |         yield cur
31 |         cur, pre = cur + pre, cur
32 |         count += 1
33 | 
34 | 
35 | def demo_fib():
36 |     ge_fib = fibonacci(10)
37 |     for i in ge_fib:
38 |         print i, ", "
39 |     # 1 , 1 , 2 , 3 , 5 , 8 , 13 , 21 , 34 , 55
40 | 
41 |     ge_fib = fibonacci(5)
42 |     print next(ge_fib)
43 |     # 1
44 |     print next(ge_fib)
45 |     # 1
46 | 
47 | 
48 | def read_file(f_path='__init__.py'):
49 |     # read 60 bytes once
50 |     bt_once = 60
51 |     with open(f_path, 'rb') as fmp3:
52 |         data = fmp3.read(bt_once)
53 |         while data:
54 |             yield data
55 |             data = fmp3.read(bt_once)
56 | 
57 | 
58 | def demo_read_file():
59 |     for txt in read_file():
60 |         print txt
61 | 
62 |     # # coding:utf-8
63 |     # if __name__ == '__main__':
64 |     # pass
65 | 
66 | if __name__ == '__main__':
67 |     # demo_1()
68 |     # demo_fib()
69 |     demo_read_file()
70 |     pass
71 | 


--------------------------------------------------------------------------------
/python_utils/netsocket/ip_int.py:
--------------------------------------------------------------------------------
 1 | import struct
 2 | import socket
 3 | import math
 4 | 
 5 | 
 6 | def ip2int(addr):
 7 |     return struct.unpack("!I", socket.inet_aton(addr))[0]
 8 | 
 9 | 
10 | def int2ip(addr):
11 |     return socket.inet_ntoa(struct.pack("!I", addr))
12 | 
13 | 
14 | def str_ip2_int(s_ip='192.168.1.100'):
15 |     lst = [int(item) for item in s_ip.split('.')]
16 |     print lst
17 |     # [192, 168, 1, 100]
18 | 
19 |     int_ip = lst[3] | lst[2] << 8 | lst[1] << 16 | lst[0] << 24
20 |     return int_ip   # 3232235876
21 | 
22 | 
23 | def str_ip2_int_v2(s_ip='192.168.1.100'):
24 |     lst = [int(item) for item in s_ip.split('.')]
25 |     lst = map(int, s_ip.split('.'))
26 |     print lst
27 |     # [192, 168, 1, 100]
28 | 
29 |     int_ip = lst[3] + lst[2] * pow(2, 8) + lst[1] * pow(2, 16) + lst[0] * pow(2, 24)
30 |     return int_ip   # 3232235876
31 | 
32 | 
33 | def int_ip2str(int_ip=3232235876):
34 |     lst = []
35 |     for i in xrange(4):
36 |         shift_n = 8 * i
37 |         lst.insert(0, str((int_ip >> shift_n) & 0xff))
38 | 
39 |     return ".".join(lst)
40 | 
41 | 
42 | if __name__ == '__main__':
43 |     str_ip = '192.168.1.100'
44 |     int_ip = ip2int(str_ip)
45 |     print "%s -> int is: %s" % (str_ip, int_ip)
46 |     # 192.168.1.100 -> int is: 3232235876
47 | 
48 |     str_ip = int2ip(int_ip)
49 |     print "%s -> str is: %s" % (int_ip, str_ip)
50 |     # 3232235876 -> str is: 192.168.1.100
51 | 
52 |     print str_ip2_int_v2()
53 | 
54 |     print int_ip2str(int_ip)
55 | 
56 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/wsgi_demo/wsgi_demo.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | """
 4 | C:\Users\DW>curl -i -X POST 127.0.0.1:8080
 5 | HTTP/1.0 200 OK
 6 | Date: Fri, 01 Dec 2017 12:06:27 GMT
 7 | Server: WSGIServer/0.1 Python/2.7.12
 8 | Content-Type: text/html
 9 | Content-Length: 257
10 | 
11 | <html>   <head> <title>Hello User!</title> </head>
12 |     <body>
13 | .......
14 |     </body>
15 |     </html>
16 | """
17 | 
18 | from wsgiref.simple_server import make_server
19 | 
20 | 
21 | def application(environ, start_response):
22 |     start_response("200 OK", [("Content-type", "text/plain")])
23 |     return ["Hello my friend!".encode("utf-8")]
24 | 
25 | 
26 | form = """<html>   <head> <title>Hello User!</title> </head>
27 |     <body>
28 |         <form method="post">
29 |             <label>Hello</label>
30 |             <input type="text" name="name">
31 |             <input type="submit" value="Go">
32 |         </form>
33 |     </body>
34 |     </html>
35 |     """
36 | 
37 | 
38 | def app_post(environ, start_response):
39 |     html = form
40 |     start_response('200 OK', [('Content-Type', 'text/html')])
41 |     print environ
42 |     if environ['REQUEST_METHOD'] == 'POST':
43 |         return [html]
44 | 
45 |     elif environ['REQUEST_METHOD'] == 'GET':
46 |         return ["get request".encode("utf-8")]
47 | 
48 | if __name__ == '__main__':
49 |     # server = make_server('localhost', 8080, application)
50 |     server = make_server('localhost', 8080, app_post)
51 |     server.serve_forever()
52 |     pass
53 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/random_operator.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | import random
 3 | import string
 4 | 
 5 | 
 6 | def generate_random_num_str(length):
 7 |     return ''.join(random.choice(string.letters) for i in range(length))
 8 | 
 9 | 
10 | def get_random_int():
11 |     # n -> [0, 10]
12 |     n = random.randint(0, 10)
13 |     print n
14 | 
15 |     print random.randint(180, 200)
16 | 
17 | 
18 | def rand_range():
19 |     print random.randrange(1, 100)
20 |     # like 23
21 |     print random.randrange(0, 100, 10)
22 |     # 20
23 |     print random.randrange(0, 100, 10)
24 |     # 90
25 | 
26 | 
27 | def random_seed():
28 |     sd = 3
29 |     random.seed(sd)
30 |     print "Random number with seed 10 : ", random.random()
31 | 
32 |     # It will generate same random number(do random.seed(sd) every time before)
33 |     random.seed(sd)
34 |     print "Random number with seed 10 : ", random.random()
35 | 
36 |     # It will generate same random number
37 |     random.seed(sd)
38 |     print "Random number with seed 10 : ", random.random()
39 | 
40 | if __name__ == '__main__':
41 |     # random_seed()
42 |     # rand_range()
43 |     # get_random_int()
44 |     # print random.choice(string.letters)
45 |     # f
46 |     # print generate_random_num_str(5)
47 |     # bjSQU
48 |     # print string.letters
49 |     # abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
50 |     import time
51 |     t = int(time.time())
52 |     print random.randint(1000000, 100000000)
53 | 
54 |     print random.choice([1, 3, 5, 7])
55 | 
56 |     pass
57 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/decorator_set.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | """
 4 | cited from stack overflow
 5 | thread safe set, by decorator
 6 | """
 7 | from threading import Lock
 8 | 
 9 | 
10 | def locked_method(method):
11 |     """Method decorator. Requires a lock object at self._lock"""
12 |     def newmethod(self, *args, **kwargs):
13 |         with self._lock:
14 |             return method(self, *args, **kwargs)
15 |     return newmethod
16 | 
17 | 
18 | class DecoratorLockedSet(set):
19 |     def __init__(self, *args, **kwargs):
20 |         self._lock = Lock()
21 |         super(DecoratorLockedSet, self).__init__(*args, **kwargs)
22 | 
23 |     @locked_method
24 |     def add(self, *args, **kwargs):
25 |         return super(DecoratorLockedSet, self).add(args, kwargs)
26 | 
27 |     @locked_method
28 |     def remove(self, *args, **kwargs):
29 |         return super(DecoratorLockedSet, self).remove(args, kwargs)
30 | 
31 | 
32 | class LockedSet(set):
33 |     """A set where add(), remove(), and 'in' operator are thread-safe"""
34 | 
35 |     def __init__(self, *args, **kwargs):
36 |         self._lock = Lock()
37 |         super(LockedSet, self).__init__(*args, **kwargs)
38 | 
39 |     def add(self, elem):
40 |         with self._lock:
41 |             super(LockedSet, self).add(elem)
42 | 
43 |     def remove(self, elem):
44 |         with self._lock:
45 |             super(LockedSet, self).remove(elem)
46 | 
47 |     def __contains__(self, elem):
48 |         with self._lock:
49 |             super(LockedSet, self).__contains__(elem)
50 | 
51 | 


--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/split_train_test_data.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import pandas as pd
 4 | import numpy as np
 5 | 
 6 | 
 7 | def split_1():
 8 |     df = pd.DataFrame({'lst1': range(5),
 9 |                        'lst2': range(5)[::-1]},
10 |                       columns=['lst1', 'lst2'])
11 | 
12 |     print df
13 |     #    lst1  lst2
14 |     # 0     0     4
15 |     # 1     1     3
16 |     # 2     2     2
17 |     # 3     3     1
18 |     # 4     4     0
19 | 
20 |     train = df.sample(frac=0.8, random_state=200)
21 |     print train
22 |     #    lst1  lst2
23 |     # 3     3     1
24 |     # 4     4     0
25 |     # 0     0     4
26 |     # 1     1     3
27 | 
28 |     test = df.drop(train.index)
29 |     print test
30 |     #    lst1  lst2
31 |     # 2     2     2
32 | 
33 | 
34 | def split_2():
35 |     from sklearn.model_selection import train_test_split
36 | 
37 |     df = pd.DataFrame({'lst1': range(5),
38 |                        'lst2': range(5)[::-1]},
39 |                       columns=['lst1', 'lst2'])
40 | 
41 |     print df
42 |     #    lst1  lst2
43 |     # 0     0     4
44 |     # 1     1     3
45 |     # 2     2     2
46 |     # 3     3     1
47 |     # 4     4     0
48 | 
49 |     train, test = train_test_split(df, test_size=0.2)
50 | 
51 |     print train
52 |     #    lst1  lst2
53 |     # 2     2     2
54 |     # 0     0     4
55 |     # 4     4     0
56 |     # 1     1     3
57 | 
58 |     print test
59 |     #    lst1  lst2
60 |     # 3     3     1
61 | 
62 | if __name__ == '__main__':
63 |     split_2()
64 |     # split_1()
65 |     pass
66 | 


--------------------------------------------------------------------------------
/python_utils/numpy_operate/arr_equal_close.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def arr_equal():
 7 |     ar1 = np.array([[1, 2], [3, 4]])
 8 |     ar2 = np.array([[1, 2], [3, 4]])
 9 |     ar3 = np.array([[1, 2], [3, 5]])
10 | 
11 |     print np.array_equal(ar1, ar2)
12 |     # True
13 |     print np.array_equal(ar1, ar3)
14 |     # False
15 | 
16 | 
17 | def arr_equiv():
18 |     ar1 = np.array([[1, 2], [3, 4]])
19 |     ar2 = np.array([[1, 2]])
20 |     ar3 = np.array([[1, 2], [1, 2]])
21 |     ar4 = np.array([1, 2])
22 |     print np.array_equiv(ar1, ar2)
23 |     # False
24 |     print np.array_equiv(ar1, ar4)
25 |     # False
26 |     print np.array_equiv(ar2, ar3)
27 |     # True
28 | 
29 | 
30 | def arr_close():
31 |     ar1 = np.array([[1, 2], [3, 4]])
32 |     ar2 = np.array([[1.1, 2.1], [3.1, 4.1]])
33 |     ar3 = np.array([[1.00001, 2.00001], [3.00001, 4.00001]])
34 |     ar4 = np.array([[1.0001, 2.0001], [3.0001, 4.0001]])
35 | 
36 |     print np.isclose(ar1, ar2)
37 |     # [[False False]
38 |     #  [False False]]
39 |     print np.isclose(ar1, ar3)
40 |     # [[ True  True]
41 |     #  [ True  True]]
42 |     print np.isclose(ar1, ar4)
43 |     # [[False False]
44 |     #  [False False]]
45 |     print np.isclose(ar1, ar4, atol=1.e-4)
46 |     # [[ True  True]
47 |     #  [ True  True]]
48 | 
49 |     print np.allclose([1e10, 1e-7], [1.00001e10, 1e-8])
50 |     # False
51 |     print np.allclose([1e10, 1e-8], [1.00001e10, 1e-9])
52 |     # True
53 | 
54 | if __name__ == '__main__':
55 |     arr_close()
56 |     # arr_equal()
57 |     # arr_equiv()
58 |     pass
59 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/logistic_regression/lr_sklearn_v1.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | # Code source: Gaël Varoquaux
 4 | # Modified for documentation by Jaques Grobler
 5 | # License: BSD 3 clause
 6 | # http://scikit-learn.org/stable/auto_examples/linear_model/plot_iris_logistic.html
 7 | 
 8 | import numpy as np
 9 | import matplotlib.pyplot as plt
10 | from sklearn import linear_model, datasets
11 | 
12 | # import some data to play with
13 | iris = datasets.load_iris()
14 | X = iris.data[:, :2]  # we only take the first two features.
15 | Y = iris.target
16 | 
17 | h = .02  # step size in the mesh
18 | 
19 | logreg = linear_model.LogisticRegression(C=1e5)
20 | 
21 | # we create an instance of Neighbours Classifier and fit the data.
22 | logreg.fit(X, Y)
23 | 
24 | # Plot the decision boundary. For that, we will assign a color to each
25 | # point in the mesh [x_min, x_max]x[y_min, y_max].
26 | x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
27 | y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
28 | xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
29 | Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])
30 | 
31 | # Put the result into a color plot
32 | Z = Z.reshape(xx.shape)
33 | plt.figure(1, figsize=(4, 3))
34 | plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)
35 | 
36 | # Plot also the training points
37 | plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k', cmap=plt.cm.Paired)
38 | plt.xlabel('Sepal length')
39 | plt.ylabel('Sepal width')
40 | 
41 | plt.xlim(xx.min(), xx.max())
42 | plt.ylim(yy.min(), yy.max())
43 | plt.xticks(())
44 | plt.yticks(())
45 | 
46 | plt.show()
47 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/dw_img_from_google.py:
--------------------------------------------------------------------------------
 1 | from google_images_download import google_images_download
 2 | 
 3 | chrome_driver_path = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver.exe"
 4 | out_put_path = "E:/face_rec/yy_face_demand/cartoon_sample/"
 5 | out_put_path = "E:/face_rec/short_vedio_famous_people/people_lst/"
 6 | out_put_path = "E:/people_detection/test_datasets/"
 7 | 
 8 | 
 9 | def dw(s_keyword):
10 |     """
11 |     :param s_keyword: like "pet cat images, pet dog images"
12 |     :return: None
13 |     """
14 |     # class instantiation
15 |     response = google_images_download.googleimagesdownload()
16 | 
17 |     # creating list of arguments
18 |     arguments = {"keywords": s_keyword,
19 |                  "limit": 200, "print_urls": True,
20 |                  "output_directory": out_put_path,
21 |                  "chromedriver": chrome_driver_path}
22 | 
23 |     # passing the arguments to the function
24 |     paths = response.download(arguments)
25 | 
26 |     # printing absolute paths of the downloaded images
27 |     print(paths)
28 | 
29 | 
30 | def do_dw():
31 |     lst_keywords = ["pedestrian images"
32 |                     ]
33 |     dw(lst_keywords[0])
34 |     pass
35 | 
36 | 
37 | def dw_famous():
38 |     f_path = 'E:/face_rec/short_vedio_famous_people/famous_list.txt'
39 |     lst_famous = open(f_path).read().split('\n')
40 | 
41 |     # print len(lst_famous)
42 |     # print ",".join(lst_famous[-3:])
43 | 
44 |     s_key_word = ",".join(lst_famous)
45 |     dw(s_key_word)
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     dw_famous()
50 |     # do_dw()
51 |     pass
52 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/socket_basic/basic_client.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | import socket
 3 | import time
 4 | 
 5 | 
 6 | def basic_block():
 7 |     s = socket.socket()
 8 |     s.connect(('www.baidu.com', 80))
 9 |     print("We are connected to %s:%d" % s.getpeername())
10 |     # We are connected to 61.135.169.121:80
11 | 
12 | 
13 | def basic_non_block():
14 |     s = socket.socket()
15 |     s.setblocking(0)
16 | 
17 |     try:
18 |         s.connect(('http://vis-www.cs.umass.edu', 80))
19 |     except socket.error as e:
20 |         print(str(e))
21 |         i = 0
22 |         while True:
23 |             try:
24 |                 print("We are connected to %s:%d" % s.getpeername())
25 |                 break
26 |             except:
27 |                 print("Let's do some math while waiting: %d" % i)
28 |                 i += 1
29 |     else:
30 |         print("We are connected to %s:%d" % s.getpeername())
31 | 
32 | 
33 | def basic_connect_rcv():
34 |     """
35 |     connect to server and receive msg from server
36 |     :return:
37 |     """
38 |     s = socket.socket()
39 |     s.connect(('127.0.0.1', 8888))
40 |     # s.connect(('221.228.106.244', 8888))
41 |     print("We are connected to %s:%d" % s.getpeername())
42 |     # s_rcv = str(s.recv(1024 * 1024))
43 |     # print 'recv, len: %s, %s' % (len(s_rcv), s_rcv)
44 | 
45 |     buf = '123456789 -> '
46 |     # time.sleep(68)
47 |     for i in xrange(5):
48 |         n = s.send(buf)
49 |         print "now send %s" % n
50 |         time.sleep(2)
51 | 
52 | if __name__ == '__main__':
53 |     # basic_block()
54 |     # basic_non_block()
55 |     basic_connect_rcv()
56 |     pass


--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_index.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import pandas as pd
 4 | 
 5 | 
 6 | def test_index_1():
 7 |     idx = pd.Index([2, 3, 7, 5, 3])
 8 |     print idx
 9 |     # Int64Index([2, 3, 7, 5, 3], dtype='int64')
10 |     print idx[1]
11 |     # 3
12 |     print idx[::2]
13 |     # Int64Index([2, 7, 3], dtype='int64')
14 | 
15 |     print idx.size, idx.shape, idx.ndim, idx.dtype
16 |     # 5 (5L,) 1 int64
17 | 
18 | 
19 | def test_index_2():
20 |     idx_1 = pd.Index([1, 3, 5, 7, 9])
21 |     idx_2 = pd.Index([2, 3, 5, 7, 11])
22 | 
23 |     print idx_1 & idx_2
24 |     # Int64Index([3, 5, 7], dtype='int64')
25 |     print idx_1 | idx_2
26 |     # Int64Index([1, 2, 3, 5, 7, 9, 11], dtype='int64')
27 |     print idx_1 ^ idx_2
28 |     # Int64Index([1, 2, 9, 11], dtype='int64')
29 | 
30 | 
31 | def rename_columns():
32 |     df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
33 |     print df
34 |     #    A  B  C
35 |     # 0  1  4  7
36 |     # 1  2  5  8
37 |     # 2  3  6  9
38 | 
39 |     # df = df.rename(columns={"A": "a"}, inplace=True)
40 |     df.rename(columns={"B": "b"}, inplace=True)
41 |     print df
42 |     #    A  b  C
43 |     # 0  1  4  7
44 |     # 1  2  5  8
45 |     # 2  3  6  9
46 | 
47 |     df.columns = list('abc')
48 | 
49 |     print df
50 |     #    a  b  c
51 |     # 0  1  4  7
52 |     # 1  2  5  8
53 |     # 2  3  6  9
54 | 
55 |     df.columns.values[2] = 'C'
56 |     print df
57 |     # a  b  C
58 |     # 0  1  4  7
59 |     # 1  2  5  8
60 |     # 2  3  6  9
61 | 
62 | if __name__ == '__main__':
63 |     rename_columns()
64 |     # test_index_2()
65 |     # test_index_1()
66 |     pass
67 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/decision_tree/dtree_scratch.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | class DTree(object):
 7 |     def __init__(self):
 8 |         pass
 9 | 
10 |     def _cal_entropy(self, arr_prob):
11 |         """
12 |         for example  arr_prob like [0.5, 0.5]
13 |         return -1 * 0.5 *log0.5 + -1 * 0.5 * log0.5 = 1
14 |         :param arr_prob: one dimension probability array
15 |         :return: entropy
16 |         """
17 |         # -1 * sum(Pi * logPi)
18 |         return np.sum(-1 * np.log2(arr_prob) * arr_prob)
19 | 
20 |     def _cal_conditional_entropy(self, X, Y):
21 |         """
22 |         calculate conditional entropy H(D|A)
23 |         :return:
24 |         """
25 |         pass
26 | 
27 |     def _cal_class_entropy(self, y):
28 |         """
29 |         calculate data set entropy
30 | 
31 |         :param y:
32 |         :return:
33 |         """
34 |         num = len(y)
35 |         print num  # 15
36 |         unique_class, counter = np.unique(y, return_counts=True)
37 |         print unique_class, counter
38 |         # [0 1] [6 9]
39 |         # calculate each class probability
40 |         class_prob = [c * 1.0 / num for c in counter]
41 |         print class_prob
42 |         # [0.40000000000000002, 0.59999999999999998]
43 |         print self._cal_entropy(class_prob)
44 |         # 0.970950594455
45 | 
46 | 
47 | def test_cal_class_entropy():
48 |     from create_data import get_loan_data_lh
49 |     X, Y = get_loan_data_lh()
50 |     dt = DTree()
51 |     dt._cal_class_entropy(Y)
52 | 
53 | if __name__ == '__main__':
54 |     test_cal_class_entropy()
55 |     # dt = DTree()
56 | 
57 |     pass
58 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/knearest/knn_scratch.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | """
 3 | implement knn from scratch
 4 | """
 5 | from collections import Counter
 6 | import numpy as np
 7 | 
 8 | 
 9 | class KnnScratch(object):
10 | 
11 |     def fit(self, x_train, y_train):
12 |         self.x_train = x_train
13 |         self.y_train = y_train
14 | 
15 |     def predict_once(self, x_test, k):
16 |         lst_distance = []
17 |         lst_predict = []
18 | 
19 |         for i in xrange(len(self.x_train)):
20 |             # euclidean distance
21 |             distance = np.linalg.norm(x_test - self.x_train[i, :])
22 |             # distance = np.sqrt(np.sum(np.square(x_test, x_train[i, :])))
23 |             lst_distance.append([distance, i])
24 | 
25 |         lst_distance = sorted(lst_distance)
26 | 
27 |         for i in xrange(k):
28 |             idx = lst_distance[i][1]
29 |             lst_predict.append(self.y_train[idx])
30 | 
31 |         return Counter(lst_predict).most_common(1)[0][0]
32 | 
33 |     def predict(self, x_test, k):
34 |         lst_predict = []
35 |         for i in xrange(len(x_test)):
36 |             lst_predict.append(self.predict_once(x_test[i, :], k))
37 | 
38 |         return lst_predict
39 | 
40 | if __name__ == '__main__':
41 |     x_train = np.array([[1, 1, 1], [2, 2, 2], [10, 10, 10], [13, 13, 13]])
42 |     y_train = ['aa', 'aa', 'bb', 'bb']
43 |     x_test = np.array([[3, 2, 4], [9, 13, 11]])
44 | 
45 |     k = 2
46 |     knn = KnnScratch()
47 |     knn.fit(x_train, y_train)
48 | 
49 |     print knn.predict_once(x_test[0], k)
50 |     # aa
51 | 
52 |     print knn.predict(x_test, k)
53 |     # ['aa', 'bb']
54 | 
55 |     pass
56 | 


--------------------------------------------------------------------------------
/python_utils/thread_process/pool_dummy.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | """
 3 | This file is about thread(dummy)/process pool
 4 | """
 5 | from multiprocessing import Pool as ProcessPool
 6 | from multiprocessing.dummy import Pool as ThreadPool
 7 | import logging
 8 | from time import sleep, time
 9 | from random import randrange
10 | 
11 | 
12 | # logging.basicConfig(level=logging.DEBUG,
13 | #                     format='%(levelname)s %(asctime)s %(threadName)s %(message)s',
14 | #                     datefmt='%Y-%m-%d %I:%M:%S')
15 | 
16 | logging.basicConfig(level=logging.DEBUG,
17 |                     format='%(levelname)s %(asctime)s %(processName)s %(message)s',
18 |                     datefmt='%Y-%m-%d %I:%M:%S')
19 | 
20 | 
21 | def handler(sec):
22 |     logging.debug('now I will sleep %s S', sec)
23 |     sleep(sec)
24 | 
25 | 
26 | def get_pool(b_dummy=True, num=4):
27 |     """
28 |     if b_dummy is True then get ThreadPool, or get process pool
29 |     :param b_dummy: dummy thread Pool or Process pool
30 |     :param num: thread or process num
31 |     :return: pool object
32 |     """
33 |     if b_dummy:
34 |         pool = ThreadPool(num)
35 |     else:
36 |         pool = ProcessPool(num)
37 | 
38 |     return pool
39 | 
40 | 
41 | def test_dummy_thread_pool():
42 |     start_time = time()
43 |     lst_sleep_sec = [randrange(3, 10) for i in xrange(10)]
44 |     pool = get_pool(b_dummy=False)
45 | 
46 |     results = pool.map(handler, lst_sleep_sec)
47 |     logging.debug(results)
48 |     pool.close()
49 |     pool.join()
50 |     logging.debug('time consume %s', time() - start_time)
51 |     pass
52 | 
53 | 
54 | if __name__ == '__main__':
55 |     test_dummy_thread_pool()
56 |     pass
57 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/Bayes/bayes_sklearn.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | """
 3 | Data set from
 4 | https://archive.ics.uci.edu/ml/datasets/Pima+Indians+Diabetes
 5 | https://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data
 6 | 
 7 | """
 8 | import numpy as np
 9 | from sklearn.naive_bayes import GaussianNB, BernoulliNB
10 | import pandas as pd
11 | from sklearn.model_selection import train_test_split, cross_val_score
12 | 
13 | 
14 | def sk_demo_1():
15 |     X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
16 |     Y = np.array([1, 1, 1, 2, 2, 2])
17 |     clf = GaussianNB()
18 |     clf.fit(X, Y)
19 |     test_item = np.array([[-0.8, -1]])
20 |     print clf.predict(test_item)
21 |     # [1]
22 |     print clf.get_params()
23 | 
24 | 
25 | def load_diabetes_data():
26 |     path = '../dataset/bayes/pima-indians-diabetes.txt'
27 |     df = pd.read_csv(path, header=None)
28 |     print df.head()
29 |     # the below get 9 columns, not 8 why?
30 |     # x = np.array(df.ix[:, 0:8])
31 |     x = np.array(df.ix[:, 0:7])
32 |     print x.shape, x[0]
33 |     y = np.array(df.ix[:, 8])
34 |     print y.shape, y[0]
35 | 
36 |     return train_test_split(x, y, test_size=0.33, random_state=40)
37 | 
38 | 
39 | def sk_nb_diabetes():
40 |     x_train, x_test, y_train, y_test = load_diabetes_data()
41 |     clf = GaussianNB()
42 | 
43 | 
44 | def sk_bernoulli_demo():
45 |     x = np.random.randint(2, size=(6, 100))
46 |     y = np.array([1, 2, 3, 4, 4, 5])
47 |     clf = BernoulliNB()
48 |     clf.fit(x, y)
49 |     # print clf.predict(x[2:3])
50 |     print clf.predict(x[2])
51 | 
52 | if __name__ == '__main__':
53 |     # sk_demo_1()
54 |     # load_diabetes_data()
55 |     sk_bernoulli_demo()
56 |     pass
57 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/calendar_ope.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | import datetime
 3 | import calendar
 4 | from dateutil import rrule
 5 | 
 6 | 
 7 | def get_all_day_v1():
 8 |     from datetime import datetime
 9 |     d1 = '20171030'
10 |     d2 = '20171102'
11 |     for dt in rrule.rrule(rrule.DAILY,
12 |                           dtstart=datetime.strptime(d1, '%Y%m%d'),
13 |                           until=datetime.strptime(d2, '%Y%m%d')):
14 |         print dt.strftime('%Y%m%d')
15 | 
16 |     # 20171030 20171031 20171101 20171102
17 | 
18 | 
19 | def get_all_day_v2():
20 |     d1 = datetime.date(2017, 10, 30)
21 |     d2 = datetime.date(2017, 11, 2)
22 |     delta = d2 - d1
23 |     print delta.days
24 |     # 3
25 |     days = [d1 + datetime.timedelta(days=x) for x in range((d2-d1).days + 1)]
26 |     for day in days:
27 |         print(day.strftime('%Y%m%d'))
28 | 
29 |     # 20171030 20171031 20171101 20171102
30 | 
31 | 
32 | def tb_partition_sql():
33 |     """
34 |     mysql partition table by day in month
35 |     :return:
36 |     """
37 |     sql = """PARTITION p%s VALUES LESS THAN (TO_DAYS('%s')) ENGINE = InnoDB,"""
38 |     d1 = datetime.date(2018, 12, 1)
39 |     d2 = datetime.date(2018, 12, 31)
40 |     days = [d1 + datetime.timedelta(days=x) for x in range((d2-d1).days + 2)]
41 |     # print len(days)
42 |     for i in xrange(len(days) - 1):
43 |         s1 = days[i].strftime('%Y%m%d')
44 |         s2 = days[i + 1].strftime('%Y-%m-%d')
45 |         print sql % (s1, s2)
46 | 
47 |     # PARTITION p20171201 VALUES LESS THAN (TO_DAYS('2017-12-02')) ENGINE = InnoDB,
48 |     # ...........
49 |     # PARTITION p20171231 VALUES LESS THAN (TO_DAYS('2018-01-01')) ENGINE = InnoDB,
50 | 
51 | if __name__ == '__main__':
52 |     # get_all_day_v1()
53 |     # get_all_day_v2()
54 |     tb_partition_sql()
55 |     pass
56 | 


--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_visualize_diamond.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | """
 4 | data set from
 5 | https://github.com/tidyverse/ggplot2/blob/master/data-raw/diamonds.csv
 6 | """
 7 | 
 8 | import numpy as np
 9 | import pandas as pd
10 | import matplotlib.pyplot as plt
11 | import seaborn as sns
12 | sns.set()
13 | 
14 | 
15 | def load_data():
16 |     f_path = 'E:/python_code/tc_competition/diamond/diamonds.csv'
17 |     df_data = pd.read_csv(f_path)
18 | 
19 |     print df_data.columns
20 | 
21 |     print df_data.describe()
22 | 
23 |     print df_data['clarity'].value_counts()
24 | 
25 |     my_tab = pd.crosstab(index=df_data["clarity"],  # Make a crosstab
26 |                      columns="count")      # Name the count column
27 | 
28 |     # my_tab.plot.bar()
29 |     # plt.show()
30 | 
31 |     print my_tab.sum()  # # Sum the counts
32 | 
33 |     print my_tab.shape  # Check number of rows and cols
34 | 
35 |     print my_tab.iloc[1:7]  # Slice rows 1-6
36 | 
37 |     print my_tab / my_tab.sum()
38 | 
39 |     # df_data.boxplot(column="price",        # Column to plot
40 |     #               by="clarity",         # Column to split upon
41 |     #               figsize=(8, 8))        # Figure size
42 | 
43 |     # plt.show()
44 |     print '=========================='
45 |     # two-way table
46 |     grouped = df_data.groupby(['cut', 'clarity'])
47 |     print grouped.size()
48 | 
49 |     print '=========================='
50 |     clarity_color_table = pd.crosstab(index=df_data["clarity"],
51 |                                       columns=df_data["color"])
52 | 
53 |     print clarity_color_table
54 | 
55 |     clarity_color_table.plot(kind="bar",
56 |                              figsize=(8, 8),
57 |                              stacked=True)
58 |     plt.show()
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     load_data()
63 |     pass
64 | 


--------------------------------------------------------------------------------
/python_utils/DbService/mysql_db/DbSubService.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | from DbBase import DbBase
 4 | 
 5 | 
 6 | class DbSubService(DbBase):
 7 |     def __init__(self, **kwargs):
 8 |         super(DbSubService, self).__init__(**kwargs)
 9 | 
10 |     def query(self):
11 |         pass
12 | 
13 |     def count(self, tb):
14 |         """
15 |         :param tb:
16 |         :return: table rows count
17 |         """
18 |         query_sql = ' select count(*) from %s ' % tb
19 |         self.cursor.execute(query_sql)
20 |         res = self.cursor.fetchone()
21 |         print res[0]
22 |         return res[0]
23 | 
24 |     def get_liver_info(self, limit_start, limit_size):
25 |         query = """select reported_uid,
26 |                     sum(audit_status='S01') as audit_status_S01,
27 |                     sum(audit_status='S02') as audit_status_S02,
28 |                     sum(audit_status='S03') as audit_status_S03,
29 |                     sum(audit_status='S04') as audit_status_S04,
30 |                     sum(audit_status='S05') as audit_status_S05,
31 |                     count(*) as audit_status_all from iboms.tb_ms_mobile_report_test
32 |                     group by reported_uid
33 |                     limit %s, %s""" % (limit_start, limit_size)
34 | 
35 |         self.cursor.execute(query)
36 |         return [row for row in self.cursor]
37 | 
38 |     def bulk_update(self, lst):
39 |         """
40 |         batch updates
41 |         [("new_value" , "3"),("new_value" , "6")]
42 |         :param lst:
43 |         :return:
44 |         """
45 |         query = """UPDATE Writers SET Name = %s WHERE Id = %s"""
46 |         self.cursor.executemany(query, lst)
47 |         self.conn.commit()
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     db = DbSubService(db_config_file='../config/mysql_config.json')
52 |     tb = 'tb_test'
53 |     db.count(tb)
54 | 


--------------------------------------------------------------------------------
/python_utils/numpy_operate/arr_sort.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | import numpy as np
 3 | 
 4 | 
 5 | def arr_arg_sort():
 6 |     arr = np.random.permutation(3 * 4).reshape(3, 4)
 7 |     np.random.shuffle(arr)
 8 |     print arr
 9 |     arr_sort_idx = np.argsort(arr)  # default sort by row
10 |     print arr_sort_idx
11 |     # print np.array(arr)[arr_sort_idx] # does not apply to mul-dim array
12 |     arr_sort_idx = np.argsort(arr, axis=0)  # sort by column
13 |     print arr_sort_idx
14 |     x = np.array([0, 2, 1])
15 |     print np.argsort(x)
16 |     # [0 2 1] ascending order
17 |     print np.argsort(-x)
18 |     # [1 2 0] descending order
19 | 
20 |     arr = np.array([4, 1, 3, 5])
21 |     print arr, arr[arr.argsort()]
22 |     # [4 1 3 5] [1 3 4 5]
23 |     print arr.argsort()
24 |     # [1 2 0 3]
25 |     print np.argsort(-arr), arr[np.argsort(-arr)]
26 |     # [3 0 2 1], [5 4 3 1]
27 | 
28 | 
29 | def arr_sort():
30 |     arr = np.random.permutation(3 * 4).reshape(3, 4)
31 |     print arr
32 |     # [[11  4  6  1]
33 |     #  [10  0  2  9]
34 |     #  [ 8  7  5  3]]
35 |     arr.sort()
36 |     print 'after sort \n', arr
37 |     # [[ 1  4  6 11]
38 |     #  [ 0  2  9 10]
39 |     #  [ 3  5  7  8]]
40 |     print np.sort(arr)  # the result is as some as arr.sort(),default sort by row
41 |     print np.sort(arr, axis=0)  # sort by column, axis=0 means column
42 |     pass
43 | 
44 | 
45 | def arr_sum():
46 |     arr = np.arange(6).reshape((2, 3))
47 |     print arr
48 |     # [[0 1 2]
49 |     #  [3 4 5]]
50 |     print arr.sum(axis=0)
51 |     # [3 5 7]
52 |     print arr.sum(axis=1)
53 |     # [ 3 12]
54 | 
55 |     print arr > 1
56 |     # [[False False  True]
57 |     #  [ True  True  True]]
58 |     print arr[::-1]
59 |     # [[3 4 5]
60 |     #  [0 1 2]]
61 | 
62 | if __name__ == '__main__':
63 |     # arr_sum()
64 |     arr_arg_sort()
65 |     # arr_sort()
66 |     pass
67 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/operator_ope.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | """
 3 | python operator module usage, cited from
 4 | https://docs.python.org/2/library/operator.html
 5 | """
 6 | 
 7 | import operator
 8 | 
 9 | 
10 | def cmp_fun():
11 |     a, b = 5, 3
12 |     print operator.le(a, b)
13 |     # False
14 |     print operator.gt(a, b)
15 |     # True
16 | 
17 | 
18 | def lst_ope():
19 |     lst = [1, 2, 3]
20 |     print operator.indexOf(lst, 2)
21 |     # 1
22 |     lst1 = [1, 2, 3, 2]
23 |     print operator.countOf(lst1, 2)
24 |     # 2
25 | 
26 | 
27 | def cal_ope():
28 |     lst1 = [0, 1, 2, 3]
29 |     lst2 = [10, 20, 30, 40]
30 |     print map(operator.mul, lst1, lst2)
31 |     # [0, 20, 60, 120]
32 | 
33 |     print sum(map(operator.mul, lst1, lst2))
34 |     # 200
35 | 
36 |     a, b = 1, 3
37 |     print operator.iadd(a, b)
38 |     # 4
39 | 
40 | 
41 | def item_ope():
42 |     s = ['h', 'e', 'l', 'l', 'o']
43 |     print operator.getitem(s, 1)
44 |     # e
45 |     print operator.itemgetter(1, 4)(s)
46 |     # ('e', 'o')
47 | 
48 |     inventory = [('apple', 3), ('banana', 2), ('pear', 5), ('orange', 1)]
49 |     get_count = operator.itemgetter(1)
50 |     print map(get_count, inventory)
51 |     # [3, 2, 5, 1]
52 | 
53 |     print sorted(inventory, key=get_count)
54 |     # [('orange', 1), ('banana', 2), ('apple', 3), ('pear', 5)]
55 | 
56 | 
57 | def reduce_ope():
58 |     a = [2, 3, 4, 5]
59 |     print reduce(lambda x, y: x + y, a)
60 |     # 14
61 |     print reduce(operator.add, a)
62 |     # 14
63 | 
64 |     lst = [3, 2, 3]
65 |     print reduce(operator.xor, lst)
66 |     # 2
67 | 
68 |     # use reduce with init value, sum from init
69 |     lst = [1, 2, 3]
70 |     print reduce(operator.add, lst, 10)
71 |     # 16
72 | 
73 | 
74 | if __name__ == '__main__':
75 |     reduce_ope()
76 |     # item_ope()
77 |     # cal_ope()
78 |     # lst_ope()
79 |     # cmp_fun()
80 |     pass
81 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/dataset/cluster/cluster_txt:
--------------------------------------------------------------------------------
 1 | 1.658985	4.285136
 2 | -3.453687	3.424321
 3 | 4.838138	-1.151539
 4 | -5.379713	-3.362104
 5 | 0.972564	2.924086
 6 | -3.567919	1.531611
 7 | 0.450614	-3.302219
 8 | -3.487105	-1.724432
 9 | 2.668759	1.594842
10 | -3.156485	3.191137
11 | 3.165506	-3.999838
12 | -2.786837	-3.099354
13 | 4.208187	2.984927
14 | -2.123337	2.943366
15 | 0.704199	-0.479481
16 | -0.392370	-3.963704
17 | 2.831667	1.574018
18 | -0.790153	3.343144
19 | 2.943496	-3.357075
20 | -3.195883	-2.283926
21 | 2.336445	2.875106
22 | -1.786345	2.554248
23 | 2.190101	-1.906020
24 | -3.403367	-2.778288
25 | 1.778124	3.880832
26 | -1.688346	2.230267
27 | 2.592976	-2.054368
28 | -4.007257	-3.207066
29 | 2.257734	3.387564
30 | -2.679011	0.785119
31 | 0.939512	-4.023563
32 | -3.674424	-2.261084
33 | 2.046259	2.735279
34 | -3.189470	1.780269
35 | 4.372646	-0.822248
36 | -2.579316	-3.497576
37 | 1.889034	5.190400
38 | -0.798747	2.185588
39 | 2.836520	-2.658556
40 | -3.837877	-3.253815
41 | 2.096701	3.886007
42 | -2.709034	2.923887
43 | 3.367037	-3.184789
44 | -2.121479	-4.232586
45 | 2.329546	3.179764
46 | -3.284816	3.273099
47 | 3.091414	-3.815232
48 | -3.762093	-2.432191
49 | 3.542056	2.778832
50 | -1.736822	4.241041
51 | 2.127073	-2.983680
52 | -4.323818	-3.938116
53 | 3.792121	5.135768
54 | -4.786473	3.358547
55 | 2.624081	-3.260715
56 | -4.009299	-2.978115
57 | 2.493525	1.963710
58 | -2.513661	2.642162
59 | 1.864375	-3.176309
60 | -3.171184	-3.572452
61 | 2.894220	2.489128
62 | -2.562539	2.884438
63 | 3.491078	-3.947487
64 | -2.565729	-2.012114
65 | 3.332948	3.983102
66 | -1.616805	3.573188
67 | 2.280615	-2.559444
68 | -2.651229	-3.103198
69 | 2.321395	3.154987
70 | -1.685703	2.939697
71 | 3.031012	-3.620252
72 | -4.599622	-2.185829
73 | 4.196223	1.126677
74 | -2.133863	3.093686
75 | 4.668892	-2.562705
76 | -2.793241	-2.149706
77 | 2.884105	3.043438
78 | -2.967647	2.848696
79 | 4.479332	-1.764772
80 | -4.905566	-2.911070


--------------------------------------------------------------------------------
/python_utils/numpy_operate/algebra_op.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | """
 3 | This file is about linear algebra operator
 4 | """
 5 | import numpy as np
 6 | 
 7 | 
 8 | def mean():
 9 |     arr = np.arange(12).reshape((3, 4))
10 |     print arr
11 |     # [[ 0  1  2  3]
12 |     #  [ 4  5  6  7]
13 |     #  [ 8  9 10 11]]
14 |     mean_arr = np.mean(arr, axis=0)
15 |     print mean_arr
16 |     # [ 4.  5.  6.  7.]
17 | 
18 |     print arr - mean_arr
19 |     # [[-4. -4. -4. -4.]
20 |     #  [ 0.  0.  0.  0.]
21 |     #  [ 4.  4.  4.  4.]]
22 |     print np.mean(arr)
23 |     # 5.5
24 |     print np.mean(arr, axis=1)
25 |     # [ 1.5  5.5  9.5]
26 | 
27 | 
28 | def covariance():
29 |     arr = np.arange(12).reshape((4, 3))
30 |     print arr
31 |     mean_arr = np.mean(arr, axis=0)
32 |     print mean_arr
33 |     cov_arr = np.cov(mean_arr, rowvar=0)
34 |     print cov_arr
35 |     mean_sub_arr = arr - mean_arr
36 |     print mean_sub_arr
37 |     print np.cov(mean_sub_arr, rowvar=0)
38 |     print np.var(mean_sub_arr, 0)
39 | 
40 |     a = np.array([[1, 2], [3, 4]])
41 |     print np.var(a)
42 |     # 1.25 get variance of all the element_wise
43 |     print np.var(a, 0)
44 |     # [ 1.  1.] get variance of every column
45 |     print np.var(a, 1)
46 |     # [ 0.25  0.25] get variance of every row
47 | 
48 | 
49 | def eigen_vec_val():
50 |     arr1 = np.array([[-1, 0], [0, 1]])
51 |     eig_val1, eig_vec1 = np.linalg.eig(arr1)
52 |     # print eig_val1
53 |     # print eig_vec1
54 | 
55 |     arr2 = np.random.randint(1, 10, size=(3, 3))
56 |     eig_val2, eig_vec2 = np.linalg.eig(arr2)
57 |     print eig_val2
58 |     print eig_vec2
59 |     eig_val2_idx = np.argsort(eig_val2)
60 |     print eig_val2_idx
61 |     print eig_val2_idx[::-1]
62 |     print eig_vec2[:, eig_val2_idx]
63 |     print eig_vec2[:, eig_val2_idx[::-1]]
64 | 
65 | if __name__ == '__main__':
66 |     mean()
67 |     # covariance()
68 |     # eigen_vec_val()
69 |     pass
70 | 


--------------------------------------------------------------------------------
/python_utils/numpy_operate/log2_op.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def log2_test():
 7 |     arr = np.array([0, 1, 2, 3, 2 ** 4])
 8 |     print np.log2(arr)
 9 |     # [-inf, 0, 1, 1.5849625, 4]
10 | 
11 |     arr = np.array([1, 2, 4, 2 ** 3])
12 |     arr_lg = np.log2(arr)
13 |     print arr_lg
14 |     # [ 0.  1.  2.  3.]
15 |     print arr_lg * arr
16 |     # [  0.   2.   8.  24.]
17 | 
18 |     # calculate entropy
19 |     print np.sum(arr_lg * arr)
20 |     # 34
21 | 
22 | 
23 | def cal_entropy():
24 |     arr1 = np.array([0.5, 0.5])
25 |     print np.log2(arr1)
26 |     # [-1. -1.]
27 | 
28 |     print np.sum(np.log2(arr1) * arr1)
29 |     # -1.0
30 | 
31 |     print np.sum(-1 * np.log2(arr1) * arr1)
32 |     # 1.0
33 | 
34 |     arr2 = np.array([0.1, 0.9])
35 |     print np.sum(-1 * np.log2(arr2) * arr2)
36 |     # 0.468995593589
37 | 
38 | 
39 | class SoftmaxLayer:
40 |     def __init__(self, name='Softmax'):
41 |         pass
42 | 
43 |     def forward(self, in_data):
44 |         shift_scores = in_data - np.max(in_data, axis=1).reshape(-1, 1)
45 |         self.top_val = np.exp(shift_scores) / np.sum(np.exp(shift_scores), axis=1).reshape(-1, 1)
46 |         return self.top_val
47 | 
48 |     def backward(self, residual):
49 |         N = residual.shape[0]
50 |         dscores = self.top_val.copy()
51 |         dscores[range(N), list(residual)] -= 1
52 |         dscores /= N
53 |         return dscores
54 | 
55 | 
56 | def test_log():
57 |     arr1 = np.array([[-0.1, -0.2, -0.3], [0.1, 0.2, 0.4]])
58 |     sl = SoftmaxLayer()
59 |     # print sl.forward(arr1)
60 | 
61 |     # print np.max(arr1, axis=1).reshape(-1, 1)
62 | 
63 |     arr2 = np.array([[1, 2, 3], [-1, -2, -4]])
64 |     # print sl.forward(arr2)
65 | 
66 |     arr_base = np.array([2, 2, np.e])
67 |     arr_log = np.array([1, 2, 4])
68 |     print np.log([arr_base, arr_log])
69 | 
70 | 
71 | if __name__ == '__main__':
72 |     test_log()
73 |     # log2_test()
74 |     # cal_entropy()
75 |     pass
76 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/decision_tree/dtree_sklearn.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | """
 4 | data set from:
 5 | http://archive.ics.uci.edu/ml/datasets/banknote+authentication
 6 | http://archive.ics.uci.edu/ml/machine-learning-databases/00267/data_banknote_authentication.txt
 7 | """
 8 | 
 9 | from sklearn.datasets import load_iris
10 | from sklearn import tree
11 | from sklearn.tree import export_graphviz
12 | import subprocess
13 | 
14 | from create_data import get_loan_data_lh
15 | 
16 | 
17 | def visualize_tree(tree, feature_name, dot_file):
18 |     """Create tree png using graphviz.
19 |     tree -- scikit-learn DecsisionTree.
20 |     feature_names -- list of feature names.
21 |     dot_file -- dot file name and path
22 |     """
23 |     with open("tree.dot", 'w') as f:
24 |         export_graphviz(tree, out_file=f,
25 |                         feature_names=feature_name)
26 | 
27 |     dt_png = dot_file.replace('dot', 'png')
28 |     command = ["dot", "-Tpng", dot_file, "-o", dt_png]
29 |     try:
30 |         subprocess.check_call(command)
31 |     except Exception as e:
32 |         print e
33 |         exit("Could not run dot, ie graphviz, to "
34 |              "produce visualization")
35 | 
36 | 
37 | def iris_demo():
38 |     clf = tree.DecisionTreeClassifier()
39 |     iris = load_iris()
40 |     # iris.data属性150*4,iris.target 类别归一化为了0,1,2(150*1)
41 |     clf = clf.fit(iris.data, iris.target)
42 |     dot_file = 'tree.dot'
43 |     tree.export_graphviz(clf, out_file=dot_file)
44 |     visualize_tree(clf, iris.feature_names, dot_file)
45 | 
46 |     # (graph,) = pydot.graph_from_dot_file('tree.dot')
47 |     # graph.write_png('somefile.png')
48 | 
49 | 
50 | def loan_demo():
51 |     dt = tree.DecisionTreeClassifier()
52 |     X, Y = get_loan_data_lh()
53 |     dt = dt.fit(X, Y)
54 |     dot_file = 'loan.dot'
55 |     tree.export_graphviz(dt, out_file=dot_file)
56 |     feature_names = ['age', 'has work', 'own house', 'loan level']
57 |     visualize_tree(dt, feature_names, dot_file)
58 | 
59 | 
60 | if __name__ == '__main__':
61 |     # iris_demo()
62 |     loan_demo()
63 |     pass
64 | 


--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_concat_join.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | 
 7 | def np_arr_concat():
 8 |     x = [1, 2, 3]
 9 |     y = [4, 5, 6]
10 |     z = [7, 8, 9]
11 | 
12 |     print np.concatenate([x, y, z])
13 |     # [1 2 3 4 5 6 7 8 9]
14 | 
15 |     x = [[1, 2],
16 |          [3, 4]]
17 |     print np.concatenate([x, x])
18 |     # [[1 2]
19 |     #  [3 4]
20 |     #  [1 2]
21 |     #  [3 4]]
22 | 
23 |     print np.concatenate([x, x], axis=1)
24 |     # [[1 2 1 2]
25 |     #  [3 4 3 4]]
26 | 
27 | 
28 | def make_df(cols, idx):
29 |     """
30 |     make_df('ABC', range(3))
31 |     cols->ABC, idx -> [0, 1, 2]
32 |     return
33 |        A  B  C
34 |     0 A0 B0 C0
35 |     1 A1 B1 C1
36 |     2 A2 B2 C2
37 |     """
38 |     data = {c:[str(c) + str(i) for i in idx] for c in cols}
39 | 
40 |     return pd.DataFrame(data, idx)
41 | 
42 | 
43 | def series_concat():
44 |     ser1 = pd.Series(['A', 'B', 'C'], index=[1, 2, 3])
45 |     ser2 = pd.Series(['D', 'E', 'F'], index=[4, 5, 6])
46 |     print pd.concat([ser1, ser2])
47 |     # 1    A
48 |     # 2    B
49 |     # 3    C
50 |     # 4    D
51 |     # 5    E
52 |     # 6    F
53 |     # dtype: object
54 | 
55 | 
56 | def df_concat():
57 |     df1 = make_df('AB', [1, 2])
58 |     df2 = make_df('AB', [3, 4])
59 |     print df1
60 |     #     A   B
61 |     # 1  A1  B1
62 |     # 2  A2  B2
63 |     print df2
64 |     #     A   B
65 |     # 3  A3  B3
66 |     # 4  A4  B4
67 |     print pd.concat([df1, df2])
68 |     #     A   B
69 |     # 1  A1  B1
70 |     # 2  A2  B2
71 |     # 3  A3  B3
72 |     # 4  A4  B4
73 | 
74 |     df3 = make_df('AB', [0, 1])
75 |     df4 = make_df('CD', [0, 1])
76 | 
77 |     print df3
78 |     #     A   B
79 |     # 0  A0  B0
80 |     # 1  A1  B1
81 |     print df4
82 |     #     C   D
83 |     # 0  C0  D0
84 |     # 1  C1  D1
85 |     print pd.concat([df3, df4], axis=1)
86 |     #     A   B   C   D
87 |     # 0  A0  B0  C0  D0
88 |     # 1  A1  B1  C1  D1
89 | 
90 | if __name__ == '__main__':
91 |     df_concat()
92 |     # series_concat()
93 |     # np_arr_concat()
94 |     pass
95 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/neural_network_keras/cnn_keras_digits.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | """
 4 | This file is mainly use cnn to recognize digit
 5 | """
 6 | 
 7 | import numpy as np
 8 | from keras.models import Sequential
 9 | from keras.layers import Dense, Dropout, Flatten
10 | from keras.layers.convolutional import Conv2D
11 | from keras.layers.convolutional import MaxPooling2D
12 | from keras.utils import np_utils
13 | from keras import backend as K
14 | K.set_image_dim_ordering('th')
15 | 
16 | from nn_keras_digits import load_data
17 | 
18 | 
19 | def pre_process_data():
20 |     (X_train, y_train), (X_test, y_test) = load_data()
21 |     # reshape to be [samples][pixels][width][height]
22 |     X_train = X_train.reshape(X_train.shape[0], 1, 28, 28).astype('float32')
23 |     # X_train.shape -> (60000L, 28L, 28L)
24 |     X_test = X_test.reshape(X_test.shape[0], 1, 28, 28).astype('float32')
25 |     # normalize inputs from 0-255 to 0-1
26 |     X_train = X_train / 255
27 |     X_test = X_test / 255
28 |     # one hot encode outputs
29 |     y_train = np_utils.to_categorical(y_train)
30 |     y_test = np_utils.to_categorical(y_test)
31 |     num_classes = y_test.shape[1]
32 |     return X_train, y_train, X_test, y_test
33 | 
34 | 
35 | def baseline_model():
36 |     model = Sequential()
37 |     model.add(Conv2D(32, (5, 5), input_shape=(1, 28, 28), activation='relu'))
38 |     model.add(MaxPooling2D(pool_size=(2, 2)))
39 |     model.add(Dropout(0.2))
40 |     model.add(Flatten())
41 |     model.add(Dense(128, activation='relu'))
42 |     model.add(Dense(10, activation='softmax'))
43 |     model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
44 |     return model
45 | 
46 | 
47 | def train_and_evaluate():
48 |     X_train, y_train, X_test, y_test = pre_process_data()
49 |     model = baseline_model()
50 |     model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)
51 |     scores = model.evaluate(X_test, y_test, verbose=0)
52 |     print("Baseline Error: %.2f%%" % (100-scores[1]*100))
53 | 
54 | if __name__ == '__main__':
55 |     train_and_evaluate()
56 | 
57 |     pass
58 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/obj_is.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | import operator
 3 | 
 4 | 
 5 | def i_is_check():
 6 |     a = 256
 7 |     b = 256
 8 |     print a is b
 9 |     # True
10 |     a1 = 2571111
11 |     b1 = 2571111
12 |     print a1 is b1
13 |     # True
14 | 
15 | 
16 | def max_activity(s, e):
17 |     tp_lst = sorted(zip(s, e), key=lambda t: t[1] - t[0])
18 | 
19 |     lst_target = []
20 |     while len(tp_lst):
21 |         tp_lst.sort(key=lambda t: t[1] - t[0])
22 |         min_se = tp_lst[0]
23 |         lst_target.append(min_se)
24 |         tp_lst.pop(0)
25 | 
26 |         tp_lst = filter(lambda x: x[1] <= min_se[0] or x[0] >= min_se[1], tp_lst)
27 | 
28 |     # for item in lst_target: print item
29 | 
30 |     print "->".join([str(item) for item in lst_target])
31 | 
32 | 
33 | def test_max_activity():
34 |     s = [1, 3, 0, 5, 3, 5, 6, 8, 8, 2]
35 |     e = [4, 5, 6, 7, 4, 6, 9, 10, 11, 5]
36 |     max_activity(s, e)
37 |     # (3, 4)->(5, 6)->(8, 10)
38 | 
39 |     s = [1, 3, 0, 5, 3, 5, 6, 8, 8, 2, 12]
40 |     e = [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
41 |     max_activity(s, e)
42 |     # (3, 5)->(5, 7)->(12, 14)->(8, 11)
43 | 
44 | 
45 | def coin_question(coin_val, coin_count, money):
46 |     sum = 0
47 |     d_val_count = {}
48 | 
49 |     for i in range(len(coin_val))[::-1]:
50 |         df = money - sum
51 |         n = df / coin_val[i]
52 |         n = min(coin_count[i], n)
53 |         if n:
54 |             sum = sum + n * coin_val[i]
55 |             d_val_count[coin_val[i]] = n
56 | 
57 |     return d_val_count
58 | 
59 | 
60 | def test_coin_question():
61 |     coin_val = [1, 2, 5, 10, 20, 50, 100]
62 |     coin_count = [3, 0, 2, 1, 0, 3, 5]
63 | 
64 |     money = 113
65 |     d_val_count = coin_question(coin_val, coin_count, money)
66 |     print d_val_count  # {1: 3, 10: 1, 100: 1}
67 | 
68 |     money = 272
69 |     d_val_count = coin_question(coin_val, coin_count, money)
70 |     print d_val_count  # {1: 2, 50: 1, 100: 2, 10: 1, 5: 2}
71 | 
72 | if __name__ == '__main__':
73 |     test_coin_question()
74 |     # test_max_activity()
75 |     # i_is_check()
76 |     lst = [1, 2, 3]
77 |     print range(len(lst))[::-1]
78 |     print lst[::-1]
79 |     pass
80 | 


--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_GridSearchCV.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | from sklearn import svm, datasets
 6 | from sklearn.model_selection import GridSearchCV
 7 | from sklearn.metrics import classification_report
 8 | 
 9 | import matplotlib.pyplot as plt
10 | import seaborn as sns
11 | sns.set()
12 | 
13 | 
14 | def test_grid_search_cv():
15 |     iris = datasets.load_iris()
16 |     parameters = {'kernel': ('linear', 'rbf'),
17 |                   'C': [1, 2, 4], 'gamma': [0.125, 0.25, 0.5, 1, 2, 4]}
18 | 
19 |     svr = svm.SVC()
20 |     clf = GridSearchCV(svr, parameters, n_jobs=-1)
21 |     clf.fit(iris.data, iris.target)
22 |     cv_result = pd.DataFrame.from_dict(clf.cv_results_)
23 |     with open('cv_result.csv', 'w') as f:
24 |         cv_result.to_csv(f)
25 | 
26 |     print('The parameters of the best model are: ')
27 |     print(clf.best_params_)
28 | 
29 |     y_pred = clf.predict(iris.data)
30 |     print(classification_report(y_true=iris.target, y_pred=y_pred))
31 | 
32 | 
33 | def grid_search_cv_graph():
34 |     iris = datasets.load_digits()
35 |     X = iris.data
36 |     Y = iris.target
37 | 
38 |     C_lst = [1, 10, 100, 1000]
39 |     gamma_lst = [0.125, 0.25, 0.5, 1, 2, 4]
40 |     gamma_lst = [1e-3, 1e-4]
41 | 
42 |     parameters = {'C': C_lst, 'gamma': gamma_lst}
43 | 
44 |     # parameters = {'kernel': ('linear', 'rbf'),
45 |     #               'C': C_lst, 'gamma': gamma_lst}
46 | 
47 |     clf_ = svm.SVC()
48 |     clf = GridSearchCV(clf_, parameters, cv=2, n_jobs=-1)
49 |     clf.fit(X, Y)
50 | 
51 |     print clf.best_params_
52 |     print clf.best_score_
53 | 
54 |     print clf.cv_results_
55 | 
56 |     # scores = [x[1] for x in clf.grid_scores_]
57 |     scores = clf.cv_results_['mean_test_score']
58 |     print scores
59 |     scores = np.array(scores).reshape(len(C_lst), len(gamma_lst))
60 | 
61 |     for ind, i in enumerate(C_lst):
62 |         plt.plot(gamma_lst, scores[ind], label='C: ' + str(i))
63 | 
64 |     plt.legend()
65 |     plt.xlabel('Gamma')
66 |     plt.ylabel('Mean score')
67 |     plt.show()
68 | 
69 | if __name__ == '__main__':
70 |     # test_grid_search_cv()
71 |     grid_search_cv_graph()
72 |     pass
73 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/http_client_get.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | """
 4 | This file a sample demo to do http stress test
 5 | """
 6 | import requests
 7 | import time
 8 | from multiprocessing.dummy import Pool as ThreadPool
 9 | import urllib
10 | 
11 | 
12 | def get_ret_from_http(url):
13 |     """cited from https://stackoverflow.com/questions/645312/what-is-the-quickest-way-to-http-get-in-python
14 |     """
15 |     ret = requests.get(url)
16 |     print ret.content
17 |     # eg. result: {"error":false,"resultMap":{"check_ret":1},"success":true}
18 | 
19 | 
20 | def multi_process_stress_test():
21 |     """
22 |     start up 4 thread to issue 1000 http requests to server
23 |     and test consume time
24 |     :return:
25 |     """
26 |     start = time.time()
27 |     url = """http://127.0.0.1:9325/shortvideo/checkBlack?url=http%3A%2F%2Fzbasecapture.bs2.yy.com%2F42269159_1499248536403_3.jpg&serial=abcdddddddd"""
28 |     url1 = """http://127.0.0.1:9325/shortvideo/checkBlack?url=http%3A%2F%2Fgenie.bs2dl.yy.com%2Ff4955aa1ab1c479256e2a2c5cdec73a6&serial=abceeeeeeee"""
29 |     lst_url = [url, url1]*50
30 |     pool = ThreadPool(5)
31 |     ret = pool.map(get_ret_from_http, lst_url)
32 |     pool.close()
33 |     pool.join()
34 |     print 'time consume %s' % (time.time() - start)
35 | 
36 | 
37 | def make_url():
38 |     """
39 |     generate url with parameter
40 |     https://xy.com/index.php?url=http%3A//xy.xxx.com/22.jpg&SecretId=xy_123_move
41 |     cited from https://stackoverflow.com/questions/2506379/add-params-to-given-url-in-python
42 |     https://github.com/gruns/furl a good util for url operator
43 |     :return:
44 |     """
45 |     para = {"SecretId": "xy_123_move", "url": "http://xy.xxx.com/22.jpg"}
46 | 
47 |     print urllib.urlencode(para)
48 |     # url=http%3A%2F%2Fxy.xxx.com%2F22.jpg&SecretId=xy_123_move
49 | 
50 |     base_url = 'xy.com/index.php'
51 | 
52 |     # 记得 下面的是 ?  连接
53 |     return 'https://%s?%s' % (base_url, '&'.join('%s=%s' % (k, urllib.quote(str(v))) for k, v in para.iteritems()))
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     # get_ret_from_http()
58 |     # multi_process_stress_test()
59 | 
60 |     print make_url()
61 |     # s = "abc"
62 |     s = "abc"
63 |     print urllib.quote(s)
64 |     pass
65 | 


--------------------------------------------------------------------------------
/python_utils/thread_process/pool_queue.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | """
 3 | This file light weight thread pool queue
 4 | Mainly cited from
 5 | `http://stackoverflow.com/questions/3033952/python-thread-pool-similar-to-the-multiprocessing-pool`
 6 | """
 7 | import threading
 8 | import logging
 9 | from time import sleep
10 | from random import randint
11 | from Queue import Queue
12 | 
13 | logging.basicConfig(level=logging.DEBUG,
14 |                     format='%(levelname)s %(asctime)s %(threadName)s %(message)s',
15 |                     datefmt='%Y-%m-%d %I:%M:%S')
16 | 
17 | 
18 | class Worker(threading.Thread):
19 |     def __init__(self, task):
20 |         super(Worker, self).__init__()
21 |         self.task = task
22 |         self.daemon = True  # if don't set that, then the thread won't stop automatically
23 |         self.start()
24 | 
25 |     def run(self):
26 |         while True:
27 |             logging.debug('waiting for queue')
28 |             func, args, kargs = self.task.get()
29 |             try:
30 |                 logging.debug('now I am going to do task')
31 |                 func(*args, **kargs)
32 |             except Exception, e:
33 |                 logging.warn(e)
34 |             finally:
35 |                 self.task.task_done()
36 | 
37 | 
38 | class ThreadPool:
39 |     def __init__(self, num_thread):
40 |         self.tasks = Queue(num_thread)
41 |         for w in xrange(num_thread):
42 |             Worker(self.tasks)
43 | 
44 |     def add_task(self, func, *args, **kwargs):
45 |         self.tasks.put((func, args, kwargs))
46 |         pass
47 | 
48 |     def wait_completion(self):
49 |         """
50 |         the corresponding consume thread should be a daemon thread,
51 |         so it can exit automatically
52 |         :return:
53 |         """
54 |         self.tasks.join()
55 |         pass
56 | 
57 | 
58 | def handler(sec):
59 |     logging.debug('now I will sleep %s S', sec)
60 |     sleep(sec)
61 | 
62 | 
63 | def test():
64 |     lst_sleep_sec = [randint(5, 20) for i in xrange(20)]
65 |     pool = ThreadPool(5)
66 |     for sec in lst_sleep_sec:
67 |         pool.add_task(handler, sec)
68 | 
69 |     pool.wait_completion()
70 |     pass
71 | 
72 | if __name__ == '__main__':
73 |     test()
74 |     pass
75 | 


--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_ope.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | 
 7 | def idx_align_series():
 8 |     A = pd.Series([2, 4, 6], index=[0, 1, 2])
 9 |     B = pd.Series([1, 3, 5], index=[1, 2, 3])
10 | 
11 |     print A + B
12 |     # 0    NaN
13 |     # 1    5.0
14 |     # 2    9.0
15 |     # 3    NaN
16 |     # dtype: float64
17 | 
18 |     print A.add(B, fill_value=0)
19 |     # 0    2.0
20 |     # 1    5.0
21 |     # 2    9.0
22 |     # 3    5.0
23 |     # dtype: float64
24 | 
25 | 
26 | def idx_align_df():
27 |     rng = np.random.RandomState(42)
28 |     df1 = pd.DataFrame(rng.randint(0, 20, (2, 2)),
29 |                        columns=list('AB'))
30 |     print df1
31 |     #     A   B
32 |     # 0   6  19
33 |     # 1  14  10
34 |     df2 = pd.DataFrame(rng.randint(0, 10, (3, 3)),
35 |                        columns=list('BAC'))
36 | 
37 |     print df2
38 |     #    B  A  C
39 |     # 0  7  4  6
40 |     # 1  9  2  6
41 |     # 2  7  4  3
42 | 
43 |     print df1 + df2
44 |     #     A     B   C
45 |     # 0  10.0  26.0 NaN
46 |     # 1  16.0  19.0 NaN
47 |     # 2   NaN   NaN NaN
48 | 
49 |     fill = df1.stack().mean()
50 |     print fill
51 |     # 12.25
52 |     print df1.add(df2, fill_value=fill)
53 |     #        A      B      C
54 |     # 0  10.00  26.00  18.25
55 |     # 1  16.00  19.00  18.25
56 |     # 2  16.25  19.25  15.25
57 | 
58 |     print df1
59 |     #     A   B
60 |     # 0   6  19
61 |     # 1  14  10
62 | 
63 | 
64 | def row_col_ope():
65 |     rng = np.random.RandomState(42)
66 |     arr = rng.randint(10, size=(3, 4))
67 |     print arr
68 |     # [[6 3 7 4]
69 |     #  [6 9 2 6]
70 |     # [7 4 3 7]]
71 |     print arr - arr[0]
72 |     # [[ 0  0  0  0]
73 |     #  [ 0  6 -5  2]
74 |     # [ 1  1 -4  3]]
75 |     df = pd.DataFrame(arr, columns=list('QRST'))
76 |     print df - df.iloc[0]
77 |     #    Q  R  S  T
78 |     # 0  0  0  0  0
79 |     # 1  0  6 -5  2
80 |     # 2  1  1 -4  3
81 |     print df.subtract(df['R'], axis=0)
82 |     #    Q  R  S  T
83 |     # 0  3  0  4  1
84 |     # 1 -3  0 -7 -3
85 |     # 2  3  0 -1  3
86 | 
87 |     print df  # no change
88 | 
89 | if __name__ == '__main__':
90 |     row_col_ope()
91 |     # idx_align_df()
92 |     # idx_align_series()
93 |     pass
94 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/knearest/knn_classify_sklearn.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | """
 4 | use sklearn to do knn prediction
 5 | 
 6 | data set from: https://archive.ics.uci.edu/ml/datasets/Iris
 7 | mainly cited from the below blog:
 8 | https://kevinzakka.github.io/2016/07/13/k-nearest-neighbor/
 9 | """
10 | 
11 | import numpy as np
12 | from sklearn.metrics import accuracy_score
13 | from sklearn.neighbors import KNeighborsClassifier
14 | from sklearn.model_selection import train_test_split, cross_val_score
15 | import pandas as pd
16 | import matplotlib.pyplot as plt
17 | 
18 | 
19 | def load_data():
20 |     names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
21 |     # loading training data
22 |     path = '../dataset/knn/iris_data.txt'
23 |     df = pd.read_csv(path, header=None, names=names)
24 |     # print df.head()
25 |     x = np.array(df.ix[:, 0: 4])
26 |     y = np.array(df['class'])
27 | 
28 |     print x.shape, y.shape
29 |     # x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=40)
30 |     return train_test_split(x, y, test_size=0.33, random_state=40)
31 | 
32 | 
33 | def predict():
34 |     x_train, x_test, y_train, y_test = load_data()
35 |     k = 3
36 |     knn = KNeighborsClassifier(n_neighbors=k)
37 |     knn.fit(x_train, y_train)
38 |     pred = knn.predict(x_test)
39 |     print accuracy_score(y_test, pred)
40 | 
41 | 
42 | def cross_validation():
43 |     x_train, x_test, y_train, y_test = load_data()
44 |     k_lst = list(range(1, 30))
45 |     lst_scores = []
46 | 
47 |     for k in k_lst:
48 |         knn = KNeighborsClassifier(n_neighbors=k)
49 |         scores = cross_val_score(knn, x_train, y_train, cv=10, scoring='accuracy')
50 |         lst_scores.append(scores.mean())
51 | 
52 |     # changing to misclassification error
53 |     MSE = [1 - x for x in lst_scores]
54 |     optimal_k = k_lst[MSE.index(min(MSE))]
55 |     print "The optimal number of neighbors is %d" % optimal_k
56 |     # plot misclassification error vs k
57 |     # plt.plot(k_lst, MSE)
58 |     # plt.ylabel('Misclassification Error')
59 |     plt.plot(k_lst, lst_scores)
60 |     plt.xlabel('Number of Neighbors K')
61 |     plt.ylabel('correct classification rate')
62 |     plt.show()
63 | 
64 | if __name__ == '__main__':
65 |     # load_data()
66 |     predict()
67 |     # cross_validation()
68 |     pass
69 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/base64_test.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | """
 3 | This file about some operator of base64 in python.
 4 | eg. image to base64 string, and then the base64 string to image
 5 | For a http issue, we may want to send image directly not url, for sake of
 6 | unvisited url by other server
 7 | cited from https://stackoverflow.com/questions/3715493/encoding-an-image-file-with-base64
 8 | 
 9 | base64 decode exception
10 | https://stackoverflow.com/questions/12315398/verify-is-a-string-is-encoded-in-base64-python
11 | """
12 | import base64
13 | import requests
14 | import binascii
15 | import time
16 | 
17 | 
18 | def img_base64():
19 |     img_path = 'F:/img_test/dl_img_text_recognition/online_1.jpg'
20 |     with open(img_path, 'rb') as img_file:
21 |         b64_str = base64.b64encode(img_file.read())
22 |         print len(b64_str)
23 |         # 55932
24 |         print b64_str
25 |         # /9j/4AAQSkZ.............
26 | 
27 | 
28 | def img_url_base64():
29 |     url = 'http://i2.chinanews.com/simg/hd/2017/05/15/b3e10469cc0b4b84b2e9cedbb800cd3a.jpg'
30 |     url = 'http://yysnapshot.bs2ctl7.yy.com/68a14b739dac400d1d1898327478a556b52260ec?height=720&interval=12402&file=68a14b739dac400d1d1898327478a556b52260ec&width=1280&bucket=yysnapshot&yid=7841950807447568392&day=20170820&t=1503163828000&streamid=7841950807452359080&id=3228019745205722527&size2=320&p=1'
31 |     url = 'http://imgcache.qq.com/open_proj/proj_qcloud_v2/gateway/portal/css/img/home/qcloud-logo-dark.png'
32 |     b64_str = base64.b64encode(requests.get(url).content)
33 |     print len(b64_str)
34 |     print b64_str
35 | 
36 | 
37 | def base64_exception():
38 |     s_non_b64 = 'not base64 str 123 456 '
39 |     try:
40 |         print base64.decodestring(s_non_b64)
41 |     # except Exception as e:
42 |     except binascii.Error as e:
43 |         # you'd better catch exception
44 |         print "base64 decode error %s " % e
45 | 
46 | 
47 | def b64_test():
48 |     import time
49 |     # test 300kb string decode time
50 |     s = 'a' * 1024 * 300
51 |     start = time.clock()
52 |     b64_str = base64.b64encode(s)
53 |     print time.clock() - start
54 |     start = time.clock()
55 |     base64.decodestring(b64_str)
56 |     print time.clock() - start
57 | 
58 | if __name__ == '__main__':
59 |     # img_base64()
60 |     # img_url_base64()
61 |     # base64_exception()
62 |     b64_test()
63 |     pass
64 | 


--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_dummy_val.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | 
 4 | import pandas as pd
 5 | 
 6 | 
 7 | def pd_dummy_val_1():
 8 |     raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
 9 |                 'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'],
10 |                 'sex': ['male', 'female', 'male', 'female', 'female']}
11 | 
12 |     df = pd.DataFrame(raw_data, columns=['first_name', 'last_name', 'sex'])
13 |     print df
14 | 
15 |     #    first_name last_name     sex
16 |     # 0      Jason    Miller    male
17 |     # 1      Molly  Jacobson  female
18 |     # 2       Tina       Ali    male
19 |     # 3       Jake    Milner  female
20 |     # 4        Amy     Cooze  female
21 | 
22 |     # Create a set of dummy variables from the sex variable
23 |     df_sex = pd.get_dummies(df['sex'])
24 |     # Join the dummy variables to the main dataframe
25 |     df_new = pd.concat([df, df_sex], axis=1)
26 |     print df_new
27 |     #   first_name last_name     sex  female  male
28 |     # 0      Jason    Miller    male     0.0   1.0
29 |     # 1      Molly  Jacobson  female     1.0   0.0
30 |     # 2       Tina       Ali    male     0.0   1.0
31 |     # 3       Jake    Milner  female     1.0   0.0
32 |     # 4        Amy     Cooze  female     1.0   0.0
33 | 
34 | def pd_dummy_val_2():
35 |     raw_data = {"work_hour": [9, 9, 9, 9, 9, 9, 6],
36 |                 "day": ["mon", "tus", "wend", "thur", "fri", "sta", "sun"]}
37 | 
38 |     df = pd.DataFrame(raw_data, columns=['work_hour', 'day'])
39 | 
40 |     print df
41 |     #     work_hour   day
42 |     # 0          9   mon
43 |     # 1          9   tus
44 |     # 2          9  wend
45 |     # 3          9  thur
46 |     # 4          9   fri
47 |     # 5          9   sta
48 |     # 6          6   sun
49 | 
50 |     df_day = pd.get_dummies(df['day'])
51 | 
52 |     df_new = pd.concat([df, df_day], axis=1)
53 | 
54 |     print df_new
55 |     #     work_hour   day  fri  mon  sta  sun  thur  tus  wend
56 |     # 0          9   mon  0.0  1.0  0.0  0.0   0.0  0.0   0.0
57 |     # 1          9   tus  0.0  0.0  0.0  0.0   0.0  1.0   0.0
58 |     # 2          9  wend  0.0  0.0  0.0  0.0   0.0  0.0   1.0
59 |     # 3          9  thur  0.0  0.0  0.0  0.0   1.0  0.0   0.0
60 |     # 4          9   fri  1.0  0.0  0.0  0.0   0.0  0.0   0.0
61 |     # 5          9   sta  0.0  0.0  1.0  0.0   0.0  0.0   0.0
62 |     # 6          6   sun  0.0  0.0  0.0  1.0   0.0  0.0   0.0
63 | 
64 | if __name__ == '__main__':
65 |     pd_dummy_val_1()
66 |     # pd_dummy_val_2()
67 |     pass
68 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/http_realize/static_server/static_server.py:
--------------------------------------------------------------------------------
 1 | import sys, os, BaseHTTPServer
 2 | 
 3 | 
 4 | class ServerException(Exception):
 5 |     '''For internal error reporting.'''
 6 |     pass
 7 | 
 8 | 
 9 | class CaseNoFile(object):
10 |     @staticmethod
11 |     def test(handler):
12 |         return not os.path.exists(handler.full_path)
13 | 
14 |     @staticmethod
15 |     def act(handler):
16 |         raise ServerException({"'{0}' not found".format(handler.full_path)})
17 | 
18 | 
19 | class CaseExistFile(object):
20 |     @staticmethod
21 |     def test(handler):
22 |         return os.path.isfile(handler.full_path)
23 | 
24 |     @staticmethod
25 |     def act(handler):
26 |         handler.handle_file(handler.full_path)
27 | 
28 | 
29 | class CaseError(object):
30 |     @staticmethod
31 |     def test(handler):
32 |         return True
33 | 
34 |     @staticmethod
35 |     def act(handler):
36 |         raise ServerException("'{0}' unknown object".format(handler.full_path))
37 | 
38 | 
39 | class RequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
40 | 
41 |     CasesLst = [CaseNoFile, CaseExistFile, CaseError]
42 | 
43 |     Error_Page = """\
44 |         <html>
45 |         <body>
46 |         <h1>Error accessing {path}</h1>
47 |         <p>{msg}</p>
48 |         </body>
49 |         </html>
50 |         """
51 | 
52 |     def do_GET(self):
53 |         try:
54 |             self.full_path = os.getcwd() + self.path
55 |             for case in self.CasesLst:
56 |                 if case.test(self):
57 |                     case.act(self)
58 |                     break
59 | 
60 |         except Exception as msg:
61 |             self.handle_error(msg)
62 | 
63 |     def handle_file(self, path):
64 |         try:
65 |             with open(path, 'rb') as reader:
66 |                 content = reader.read()
67 |             self.send_content(content)
68 |         except IOError as msg:
69 |             msg = "'{0}' cannot be read: {1}".format(self.path, msg)
70 |             self.handle_error(msg)
71 | 
72 |     def handle_error(self, msg):
73 |         content = self.Error_Page.format(path=self.path, msg=msg)
74 |         self.send_content(content)
75 | 
76 |     def send_content(self, content):
77 |         self.send_response(200)
78 |         self.send_header("Content-type", "text/html")
79 |         self.send_header("Content-Length", str(len(content)))
80 |         self.end_headers()
81 |         self.wfile.write(content)
82 | 
83 | if __name__ == '__main__':
84 |     serverAddress = ('', 8888)
85 |     server = BaseHTTPServer.HTTPServer(serverAddress, RequestHandler)
86 |     server.serve_forever()
87 | 


--------------------------------------------------------------------------------
/python_utils/numpy_operate/idx_arrays.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | 
  3 | import numpy as np
  4 | 
  5 | 
  6 | def single_idx():
  7 |     arr = np.arange(6)
  8 |     print arr
  9 |     # [0 1 2 3 4 5]
 10 |     print arr[1], arr[-1]
 11 |     # 1 5
 12 |     arr2 = np.arange(10)
 13 |     print arr2
 14 |     # [0 1 2 3 4 5 6 7 8 9]
 15 |     print arr2[2: 5], arr2[:-7], arr2[1: 7: 2]
 16 |     # [2 3 4] [0 1 2] [1 3 5]
 17 | 
 18 |     b = arr2 > 7
 19 |     print b
 20 |     # [False False False False False False False False  True  True]
 21 |     print arr2[b]
 22 |     # [8 9]
 23 | 
 24 | 
 25 | def multidimen_idx():
 26 |     arr = np.arange(6)
 27 |     print arr
 28 |     # [0 1 2 3 4 5]
 29 |     arr.shape = (2, 3)
 30 |     print arr
 31 |     # [[0 1 2]
 32 |     #  [3 4 5]]
 33 | 
 34 |     print arr[1, 1], arr[1, -1]
 35 |     # 4 5
 36 |     print arr[1], arr[1][1]
 37 |     # [3 4 5] 4
 38 | 
 39 |     arr2 = np.arange(35).reshape(5, 7)
 40 |     print arr2
 41 |     # [[ 0  1  2  3  4  5  6]
 42 |     #  [ 7  8  9 10 11 12 13]
 43 |     #  [14 15 16 17 18 19 20]
 44 |     #  [21 22 23 24 25 26 27]
 45 |     #  [28 29 30 31 32 33 34]]
 46 | 
 47 |     print arr2[1:5:2]
 48 |     # [[ 7  8  9 10 11 12 13]
 49 |     #  [21 22 23 24 25 26 27]]
 50 | 
 51 |     # 1:5:2 means row 2, 4, ::3 means every 3 column
 52 |     print arr2[1:5:2, ::3]
 53 |     # [[ 7 10 13]
 54 |     #  [21 24 27]]
 55 | 
 56 |     print arr2[np.array([0, 2, 4]), 1:3]
 57 |     # [[ 1  2]
 58 |     #  [15 16]
 59 |     #  [29 30]]
 60 | 
 61 |     print arr2[np.array([0, 2, 4]), np.array([0, 1, 2])]
 62 |     # [ 0 15 30]
 63 | 
 64 |     print arr2[np.array([0, 2, 4]), 1]
 65 |     # [ 1 15 29]
 66 | 
 67 |     b = arr2 > 20
 68 |     print arr2[b]
 69 |     # [21 22 23 24 25 26 27 28 29 30 31 32 33 34]
 70 | 
 71 | 
 72 | def n_dimension_arr():
 73 |     arr = np.arange(30).reshape(2, 3, 5)
 74 |     print arr
 75 |     # [[[ 0  1  2  3  4]
 76 |     #   [ 5  6  7  8  9]
 77 |     #  [10 11 12 13 14]]
 78 |     #
 79 |     # [[15 16 17 18 19]
 80 |     #  [20 21 22 23 24]
 81 |     # [25 26 27 28 29]]]
 82 | 
 83 | 
 84 | def assign_val():
 85 |     arr = np.arange(10)
 86 |     print arr
 87 |     # [0 1 2 3 4 5 6 7 8 9]
 88 |     arr[2:7] = 1
 89 |     print arr
 90 |     # [0 1 1 1 1 1 1 7 8 9]
 91 |     arr[2:7] = range(5)
 92 |     print arr
 93 |     # [0 1 0 1 2 3 4 7 8 9]　
 94 | 
 95 | 
 96 | def np_argmax():
 97 |     arr = np.array([1, 5, 3])
 98 |     print np.argmax(arr)
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |     np_argmax()
103 |     # assign_val()
104 |     # n_dimension_arr()
105 |     # single_idx()
106 |     # multidimen_idx()
107 |     pass
108 | 


--------------------------------------------------------------------------------
/python_utils/thread_process/thread_lock.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | """
 4 | This file is about operator of threading.Lock
 5 | using which we can realize a thread safe counter
 6 | """
 7 | 
 8 | import logging
 9 | import threading
10 | import random
11 | import time
12 | 
13 | 
14 | logging.basicConfig(level=logging.DEBUG,
15 |                     format='(%(asctime)s %(threadName)-10s) %(message)s',
16 |                     datefmt='%Y-%m-%d %I:%M:%S')
17 | 
18 | 
19 | class CounterThreadSafe(threading.Thread):
20 |     def __init__(self, start=0):
21 |         super(CounterThreadSafe, self).__init__()
22 |         self.val = start
23 |         self.lock = threading.Lock()
24 | 
25 |     def inc(self, num):
26 |         try:
27 |             logging.debug('wanting for lock, before num is %s val is %s', num, self.val)
28 |             self.lock.acquire()
29 |             self.val += num
30 |             logging.debug('after counter val is %s', self.val)
31 |         finally:
32 |             self.lock.release()
33 | 
34 |     def inc_v2(self, num):
35 |         logging.debug('wanting for lock, before num is %s val is %s', num, self.val)
36 |         with self.lock:
37 |             self.val += num
38 |         logging.debug('after counter val is %s', self.val)
39 | 
40 | g_sum = 0
41 | 
42 | 
43 | def do_counter(counter):
44 |     global g_sum
45 |     for i in xrange(0, 2):
46 |         sleep_sec = random.randint(1, 3)
47 |         logging.debug('now sleeping %s S', sleep_sec)
48 |         g_sum += sleep_sec
49 |         time.sleep(sleep_sec)
50 |         # counter.inc(sleep_sec)
51 |         counter.inc_v2(sleep_sec)
52 | 
53 | 
54 | def test_multi_thread_counter():
55 |     counter = CounterThreadSafe()
56 | 
57 |     for i in xrange(0, 5):
58 |         t = threading.Thread(target=do_counter, args=(counter, ))
59 |         t.start()
60 |         # t.join()   # can't join in this row or it will block the main thread
61 | 
62 |     logging.debug('start all thread.....done')
63 | 
64 | 
65 | def join_all_others_thread():
66 |     logging.debug('now join all the other threads')
67 |     main_thread = threading.currentThread()
68 |     for t in threading.enumerate():
69 |         if t is not main_thread:
70 |             t.join()
71 | 
72 |     # the following msg will be print after all the other thread done
73 |     logging.debug('join all the other threads success')
74 | 
75 | 
76 | if __name__ == '__main__':
77 |     test_multi_thread_counter()
78 |     join_all_others_thread()
79 |     # the following msg will be print after all the other thread done
80 |     logging.debug('all the sub threads done')
81 |     logging.debug('g_sum is %s', g_sum)
82 |     pass
83 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/dataset/perception/dataset.txt:
--------------------------------------------------------------------------------
  1 | -0.017612	14.053064	0
  2 | -1.395634	4.662541	1
  3 | -0.752157	6.538620	0
  4 | -1.322371	7.152853	0
  5 | 0.423363	11.054677	0
  6 | 0.406704	7.067335	1
  7 | 0.667394	12.741452	0
  8 | -2.460150	6.866805	1
  9 | 0.569411	9.548755	0
 10 | -0.026632	10.427743	0
 11 | 0.850433	6.920334	1
 12 | 1.347183	13.175500	0
 13 | 1.176813	3.167020	1
 14 | -1.781871	9.097953	0
 15 | -0.566606	5.749003	1
 16 | 0.931635	1.589505	1
 17 | -0.024205	6.151823	1
 18 | -0.036453	2.690988	1
 19 | -0.196949	0.444165	1
 20 | 1.014459	5.754399	1
 21 | 1.985298	3.230619	1
 22 | -1.693453	-0.557540	1
 23 | -0.576525	11.778922	0
 24 | -0.346811	-1.678730	1
 25 | -2.124484	2.672471	1
 26 | 1.217916	9.597015	0
 27 | -0.733928	9.098687	0
 28 | -3.642001	-1.618087	1
 29 | 0.315985	3.523953	1
 30 | 1.416614	9.619232	0
 31 | -0.386323	3.989286	1
 32 | 0.556921	8.294984	1
 33 | 1.224863	11.587360	0
 34 | -1.347803	-2.406051	1
 35 | 1.196604	4.951851	1
 36 | 0.275221	9.543647	0
 37 | 0.470575	9.332488	0
 38 | -1.889567	9.542662	0
 39 | -1.527893	12.150579	0
 40 | -1.185247	11.309318	0
 41 | -0.445678	3.297303	1
 42 | 1.042222	6.105155	1
 43 | -0.618787	10.320986	0
 44 | 1.152083	0.548467	1
 45 | 0.828534	2.676045	1
 46 | -1.237728	10.549033	0
 47 | -0.683565	-2.166125	1
 48 | 0.229456	5.921938	1
 49 | -0.959885	11.555336	0
 50 | 0.492911	10.993324	0
 51 | 0.184992	8.721488	0
 52 | -0.355715	10.325976	0
 53 | -0.397822	8.058397	0
 54 | 0.824839	13.730343	0
 55 | 1.507278	5.027866	1
 56 | 0.099671	6.835839	1
 57 | -0.344008	10.717485	0
 58 | 1.785928	7.718645	1
 59 | -0.918801	11.560217	0
 60 | -0.364009	4.747300	1
 61 | -0.841722	4.119083	1
 62 | 0.490426	1.960539	1
 63 | -0.007194	9.075792	0
 64 | 0.356107	12.447863	0
 65 | 0.342578	12.281162	0
 66 | -0.810823	-1.466018	1
 67 | 2.530777	6.476801	1
 68 | 1.296683	11.607559	0
 69 | 0.475487	12.040035	0
 70 | -0.783277	11.009725	0
 71 | 0.074798	11.023650	0
 72 | -1.337472	0.468339	1
 73 | -0.102781	13.763651	0
 74 | -0.147324	2.874846	1
 75 | 0.518389	9.887035	0
 76 | 1.015399	7.571882	0
 77 | -1.658086	-0.027255	1
 78 | 1.319944	2.171228	1
 79 | 2.056216	5.019981	1
 80 | -0.851633	4.375691	1
 81 | -1.510047	6.061992	0
 82 | -1.076637	-3.181888	1
 83 | 1.821096	10.283990	0
 84 | 3.010150	8.401766	1
 85 | -1.099458	1.688274	1
 86 | -0.834872	-1.733869	1
 87 | -0.846637	3.849075	1
 88 | 1.400102	12.628781	0
 89 | 1.752842	5.468166	1
 90 | 0.078557	0.059736	1
 91 | 0.089392	-0.715300	1
 92 | 1.825662	12.693808	0
 93 | 0.197445	9.744638	0
 94 | 0.126117	0.922311	1
 95 | -0.679797	1.220530	1
 96 | 0.677983	2.556666	1
 97 | 0.761349	10.693862	0
 98 | -2.168791	0.143632	1
 99 | 1.388610	9.341997	0
100 | 0.317029	14.739025	0


--------------------------------------------------------------------------------
/python_utils/machine_learn/logistic_regression/lr_scratch.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | 
 6 | 
 7 | class LogisticRegression(object):
 8 |     def __init__(self):
 9 |         self._map_method()
10 |         pass
11 | 
12 |     def _map_method(self):
13 |         self._do_train = {"gd": self._gd, "sgd": self._sgd}
14 | 
15 |     def _sigmoid(self, x):
16 |         return 1.0 / (1 + np.exp(-x))
17 | 
18 |     def fit(self, X, Y, **opt):
19 |         m, n = X.shape
20 |         self._weight = np.ones((n, 1))
21 |         max_iter = opt.get("max_iter", 100)
22 |         alpha = opt.get("alpha", 0.01)
23 |         method = opt.get("method", "sgd")
24 | 
25 |         for k in xrange(max_iter):
26 |             try:
27 |                 self._do_train[method](X, Y, alpha)
28 | 
29 |                 print "iter %s error rate %s" % (k, self._get_error_rate(X, Y))
30 |             except KeyError:
31 |                 raise ValueError('method error')
32 | 
33 |     def _sgd(self, X, Y, alpha):
34 |         """stochastic gradient descent"""
35 |         m, n = X.shape
36 |         for i in xrange(m):
37 |             # pred = self._sigmoid(X[i, :] * self._weight)
38 |             pred = self._sigmoid(np.dot(X[i, :], self._weight))
39 |             error = Y[i] - pred
40 |             self._weight = self._weight + alpha * np.matrix(X[i, :]).T * error
41 | 
42 |     def _gd(self, X, Y, alpha):
43 |         """gradient descent"""
44 |         pred = self._sigmoid(X * self._weight)
45 |         error = Y - pred
46 |         self._weight = self._weight + alpha * X.T * error
47 | 
48 |     def _get_error_rate(self, X, Y):
49 |         all_num = len(Y)
50 |         error_num = 0
51 |         for i in xrange(all_num):
52 |             pred = self._sigmoid(np.dot(X[i, :], self._weight)) > 0.5
53 |             if pred != bool(Y[i]):
54 |                 error_num += 1
55 | 
56 |         return error_num * 1.0 / all_num
57 | 
58 | 
59 | def get_data():
60 |     path = '../dataset/logistic_regression/lr_ml_action.txt'
61 | 
62 |     data = pd.read_csv(path, delim_whitespace=True,
63 |                           names=['f1', 'f2', 'label'],
64 |                           dtype={'A': np.float64, 'B': np.float64, 'C': np.int64})
65 | 
66 |     # add bias w0
67 |     data['f0'] = 1
68 |     print data.head()
69 |     features = ['f0', 'f1', 'f2']
70 |     return data[features].values, data.label.values
71 | 
72 | 
73 | def test_lr():
74 |     X, Y = get_data()
75 | 
76 |     lr = LogisticRegression()
77 |     arr = np.array([-1, 0, 1])
78 |     print lr._sigmoid(arr)
79 |     lr.fit(X, Y)
80 | 
81 | 
82 | if __name__ == '__main__':
83 |     # get_data()
84 |     test_lr()
85 |     pass
86 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/functional_program.py:
--------------------------------------------------------------------------------
  1 | # _*_ coding:utf-8 _*_
  2 | 
  3 | from functools import partial
  4 | 
  5 | 
  6 | def inc(x):
  7 |     def inc_x(y):
  8 |         return x + y
  9 |     return inc_x
 10 | 
 11 | 
 12 | def test_inc():
 13 |     inc2 = inc(2)
 14 |     inc5 = inc(5)
 15 | 
 16 |     print inc2(5)  # 7
 17 |     print inc5(5)  # 10
 18 | 
 19 | 
 20 | def to_upper(item):
 21 |     return item.upper()
 22 | 
 23 | 
 24 | def map_demo():
 25 |     name_lst = ['xy', 'bear fish', 'jay']
 26 |     name_len = map(len, name_lst)
 27 |     print name_len
 28 |     # [2, 9, 3]
 29 | 
 30 |     name_upper = map(to_upper, name_lst)
 31 |     print name_upper
 32 |     # ['XY', 'BEAR FISH', 'JAY']
 33 | 
 34 |     name_up = []
 35 |     for i in range(len(name_lst)):
 36 |         name_up.append(name_lst[i].upper())
 37 |     print name_up
 38 |     # ['XY', 'BEAR FISH', 'JAY']
 39 | 
 40 |     squares = map(lambda x: x * x, range(4))
 41 |     print squares
 42 |     # [0, 1, 4, 9]
 43 | 
 44 |     a = [1, 2, 3, 4]
 45 |     b = [17, 12, 11, 10]
 46 |     c = [-1, -4, 5, 9]
 47 |     print map(lambda x, y: x + y, a, b)
 48 |     # [18, 14, 14, 14]
 49 |     print map(lambda x, y, z: x + y + z, a, b, c)
 50 |     # [17, 10, 19, 23]
 51 | 
 52 | 
 53 | def reduce_demo():
 54 |     lst = range(1, 6)
 55 |     print lst  # [1, 2, 3, 4, 5]
 56 |     sum = reduce(lambda x, y: x + y, lst)
 57 |     print sum  # 15
 58 | 
 59 | 
 60 | def cal_aver():
 61 |     lst = range(0, 11)
 62 |     positive_num_cnt = 0
 63 |     positive_num_sum = 0
 64 |     for i in range(len(lst)):
 65 |         if lst[i] > 0:
 66 |             positive_num_cnt += 1
 67 |             positive_num_sum += lst[i]
 68 | 
 69 |     average = 0
 70 |     if positive_num_cnt > 0:
 71 |         average = positive_num_sum / positive_num_cnt
 72 | 
 73 |     print average  # 5
 74 | 
 75 | 
 76 | def filter_demo():
 77 |     lst = range(0, 11)
 78 |     odd_lst = filter(lambda x: x % 2, lst)
 79 |     print odd_lst  # [1, 3, 5, 7, 9]
 80 |     average = reduce(lambda x, y: x + y, odd_lst) / len(odd_lst)
 81 |     print average  # 5
 82 | 
 83 | 
 84 | def f(a, b, c, d):
 85 |     """
 86 |     used by function partial_demo as demo
 87 |     :param a: int
 88 |     :param b: int
 89 |     :param c: int
 90 |     :param d: int
 91 |     :return:
 92 |     """
 93 |     return a * 1000 + b * 100 + c * 10 + d
 94 | 
 95 | 
 96 | def partial_demo():
 97 |     # A partial function that calls f with
 98 |     # a as 3, b as 1 and c as 4.
 99 |     g = partial(f, 3, 1, 4)
100 |     print g(5)  # 3145
101 | 
102 |     g2 = partial(f, d=4, c=3, b=2)
103 |     print g2(1)  # 1234
104 | 
105 | 
106 | if __name__ == '__main__':
107 |     partial_demo()
108 |     # test_inc()
109 |     # map_demo()
110 |     # reduce_demo()
111 |     # filter_demo()
112 |     # cal_aver()
113 |     pass
114 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/kwargs_xargs.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | 
 4 | def print_keyword_args(**kwargs):
 5 |     # kwargs is a dict of the keyword args passed to the function
 6 |     for key, value in kwargs.iteritems():
 7 |         print "%s = %s" % (key, value)
 8 | 
 9 | 
10 | def test_print_keyword_args():
11 |     print_keyword_args(first_name="John", last_name="Doe")
12 |     # first_name = John
13 |     # last_name = Doe
14 | 
15 |     dic_args = {'first_name': 'Bobby', 'last_name': 'Smith'}
16 |     # print_keyword_args(dic_args)
17 |     # TypeError: print_keyword_args() takes exactly 0 arguments (1 given)
18 |     print_keyword_args(**dic_args)
19 |     # first_name = Bobby
20 |     # last_name = Smith
21 | 
22 | 
23 | def print_everything(*args):
24 |     for count, thing in enumerate(args):
25 |         print '{0} -> {1}'.format(count, thing)
26 | 
27 | 
28 | def test_print_everything():
29 |     print_everything('apple', 'banana', 'cabbage')
30 |     # 0->apple
31 |     # 1->banana
32 |     # 2->cabbage
33 | 
34 | 
35 | def func(required_arg, *args, **kwargs):
36 |     # required_arg is a positional-only parameter.
37 |     print required_arg
38 | 
39 |     # args is a tuple of positional arguments,
40 |     # because the parameter name has * prepended.
41 |     if args:  # If args is not empty.
42 |         print args
43 | 
44 |     # kwargs is a dictionary of keyword arguments,
45 |     # because the parameter name has ** prepended.
46 |     if kwargs:  # If kwargs is not empty.
47 |         print kwargs
48 | 
49 | 
50 | def test_func():
51 |     func("required argument")
52 |     # required argument
53 |     func("required argument", 1, 2, '3')
54 |     # required argument
55 |     # (1, 2, '3')
56 |     func("required argument", 1, 2, '3', keyword1=4, keyword2="foo")
57 |     # required argument
58 |     # (1, 2, '3')
59 |     # {'keyword2': 'foo', 'keyword1': 4}
60 |     # func()
61 |     # TypeError: func() takes at least 1 argument (0 given)
62 | 
63 | 
64 | # kwargs default value
65 | class ExampleClass:
66 |     def __init__(self, **kwargs):
67 |         self.val = kwargs['val']
68 |         self.val2 = kwargs.get('val2')
69 |         self.val3 = kwargs.get('val3', 'default_val3')
70 |         self.val4 = kwargs.pop('val4', 'default_val4')
71 | 
72 | 
73 | def default_kwargs(**kwargs):
74 |     options = {
75 |         'option1': 'default_value1',
76 |         'option2': 'default_value2',
77 |         'option3': 'default_value3', }
78 | 
79 |     options.update(kwargs)
80 |     print options
81 | 
82 | 
83 | def test_default_kwargs():
84 |     default_kwargs()
85 |     # {'option2': 'default_value2', 'option3': 'default_value3', 'option1': 'default_value1'}
86 |     default_kwargs(option1='new_value1', option3='new_value3')
87 |     # {'option2': 'default_value2', 'option3': 'new_value3', 'option1': 'new_value1'}
88 | 
89 | if __name__ == '__main__':
90 |     # test_print_keyword_args()
91 |     # test_print_everything()
92 |     # test_func()
93 |     test_default_kwargs()
94 |     pass
95 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/str_basic.py:
--------------------------------------------------------------------------------
  1 | # _*_ coding:utf-8 _*_
  2 | 
  3 | import string
  4 | import random
  5 | 
  6 | 
  7 | def str_format():
  8 |     s1 = """abc%sddd""" % 'asd'
  9 |     print s1
 10 |     url = """http://127.0.0.1:9325/shortvideo/checkBlack?url=%s&serial=%s""" % ('http%3A%2F%2Fzbasecapture.bs2.yy.com%2F42269159_1499248536403_3.jpg', '77777777')
 11 |     print url
 12 | 
 13 | 
 14 | def generator_random_str(size=6, str_source=string.digits + string.lowercase):
 15 |     """:return size num str(in 'A~z, 0-9')
 16 |     eg. size=6 return 'ad14df'
 17 |     [random.choice('abcde') for _ in range(3)] -> ['a', 'b', 'b']
 18 |     ''.join(['a', 'b', 'b']) -> 'abb'
 19 |     """
 20 |     return ''.join(random.choice(str_source) for _ in xrange(size))
 21 | 
 22 | 
 23 | def str_split():
 24 |     s = 'python_worker_name&topSid_111_appid_111&topSid_222_appid_222'
 25 |     print s[0:s.find('&')]
 26 |     print s.split('&')
 27 |     print s.split('&')[1:]
 28 | 
 29 | 
 30 | def remove_sub_str():
 31 |     src = 'channel_1'
 32 |     sub_s = 'chan'
 33 |     print src[src.find(sub_s):]
 34 |     print src.find(sub_s)
 35 |     print src.replace(sub_s, '')
 36 | 
 37 | 
 38 | def str_format_once():
 39 |     query = """insert into {tb_name} (create_time, appid) VALUES (%s,%s)"""
 40 |     tb_name = 'tb_audio_rec_ret_2017_11'
 41 |     # query % tb_name  error
 42 |     print query.format(tb_name=tb_name)
 43 |     # insert into tb_audio_rec_ret_2017_11 (create_time, appid) VALUES (%s,%s)
 44 | 
 45 | 
 46 | def str_replace():
 47 |     import time
 48 |     s = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
 49 |     print s
 50 |     # 2017-11-09 17:26:34
 51 |     print s[0:7].replace('-', '_')
 52 |     # 2017_11
 53 | 
 54 | 
 55 | def char_2int_2char():
 56 |     print ord('a')
 57 |     # 97
 58 |     print chr(97)
 59 |     # a
 60 | 
 61 | 
 62 | def fill_zero():
 63 |     s_num = 11
 64 |     print str(s_num).zfill(5)
 65 |     # 00011
 66 | 
 67 | 
 68 | if __name__ == '__main__':
 69 |     print fill_zero()
 70 |     # char_2int_2char()
 71 |     # str_replace()
 72 |     # str_format_once()
 73 |     # remove_sub_str()
 74 |     # str_split()
 75 |     # str_format()
 76 |     # print generator_random_str()
 77 |     # print generator_random_str(3, 'abc123')
 78 |     # s = '123'
 79 |     # if s.find("12") == -1:
 80 |     #     print 'no no '
 81 | 
 82 |     b = 0
 83 |     b = None
 84 |     # if b is not zero not None(like -1, 1) it will print
 85 |     if b:
 86 |         print '%s not zero' % b
 87 | 
 88 |     # url = 'bear fish.com'
 89 |     # if url.endswith('.com'):
 90 |     #     url = url[:-4]
 91 |     #     print url
 92 | 
 93 |     url = 'www.myzaker.com/article/58daf1b69490cbe53400001b/'
 94 |     # if 'aa' in url:
 95 |     #     print '1'
 96 |     # elif 'comp' in url:
 97 |     #     print '2'
 98 |     # else:
 99 |     #     print '3'
100 |     # print url.find('myzaker')
101 |     # print url.find('www.myzaker')
102 |     # print url.find('http')
103 | 
104 |     # print s[1:]
105 |     # print s[:]
106 |     # print s[:2]
107 |     pass
108 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/set_ope.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | """
  3 | python unique list -> set usage
  4 | """
  5 | 
  6 | 
  7 | def define_set():
  8 |     """2 ways of defining set"""
  9 |     set_1 = set([1, 2, 3])
 10 |     print type(set_1)
 11 |     print set_1
 12 | 
 13 |     set_2 = {2, 3, 2}
 14 |     print type(set_2)
 15 |     # <type 'set'>
 16 |     print set_2
 17 |     # set([2, 3])
 18 | 
 19 |     a = set((1, 2, 3, 4))
 20 |     b = set([3, 4, 5, 6])
 21 |     print a | b  # Union
 22 |     # {1, 2, 3, 4, 5, 6}
 23 |     print a & b  # Intersection
 24 |     # {3, 4}
 25 |     print a < b  # Subset
 26 |     # False
 27 |     print a - b  # Difference
 28 |     # {1, 2}
 29 |     print a ^ b  # Symmetric Difference
 30 |     # {1, 2, 5, 6}
 31 | 
 32 | 
 33 | def set_basic_usage():
 34 |     s1 = set()
 35 | 
 36 |     s1.add('abc')
 37 |     s1.add('abc')
 38 |     s1.add(123)
 39 |     s1.add(777)
 40 |     print (s1)
 41 | 
 42 |     if 123 in s1:
 43 |         print ' find it and remove it'
 44 |         s1.remove(123)
 45 |     print s1
 46 | 
 47 | 
 48 | def dict_val_set():
 49 |     dic_val_set = {}
 50 |     dic_val_set['abc'] = set([123])
 51 |     dic_val_set['abc'].add(456)
 52 |     dic_val_set['abc'].add(123)
 53 |     print dic_val_set
 54 |     # {'abc': set([456, 123])}
 55 |     dic_val_set['ddd'] = set()
 56 |     dic_val_set['ddd'].add(123)
 57 | 
 58 |     for k in dic_val_set.keys():
 59 |         if 123 in dic_val_set[k]:
 60 |             print dic_val_set[k]
 61 | 
 62 | 
 63 | def set_remove():
 64 |     # s_src = {1, 3, 5, 7}
 65 |     s_src = {1}
 66 |     # s2 = {1, 3, 2}
 67 |     s2 = [1, 3, 2]
 68 |     # raise error
 69 |     # print s_src.remove(*s2)
 70 |     try:
 71 |         s_src.remove(*s2)
 72 |     except Exception as e:
 73 |         print e
 74 |         # print s_src
 75 |         print s_src - s2
 76 | 
 77 |     # print s_src | s2
 78 |     # set([1, 2, 3, 5, 7])
 79 |     # print s_src & s2
 80 |     # set([1, 3])
 81 | 
 82 | 
 83 | def set_lst():
 84 |     s1 = {1, 2, 3}
 85 |     lst_1 = []
 86 |     # set to list
 87 |     lst_1 += s1
 88 | 
 89 |     print s1
 90 |     print lst_1
 91 | 
 92 | 
 93 | def dict_key_to_set():
 94 |     d = {'111': 1, 'aaa': 111}
 95 |     s1 = set(d.keys())
 96 |     print s1
 97 |     # set(['111', 'aaa'])
 98 | 
 99 |     s2 = {'111', 111}
100 |     print s1 & s2
101 | 
102 | 
103 | def set_diff():
104 |     s1 = {1, 3}
105 |     s2 = {1, 2, 4}
106 | 
107 |     print s1 - s2
108 |     print s1.difference(s2)
109 |     # set([3])
110 | 
111 | 
112 | def set_hash():
113 |     lst = [1, 555, 372, 6, 6, 372, 222]
114 |     h_set = set(lst)
115 |     print h_set  # unordered
116 |     # set([1, 555, 372, 222, 6])
117 | 
118 | if __name__ == '__main__':
119 |     set_hash()
120 |     # set_diff()
121 |     # dict_key_to_set()
122 |     # set_lst()
123 |     # set_remove()
124 |     # define_set()
125 |     # dict_val_set()
126 |     # set_basic_usage()
127 |     # print min(3, 4, -1)
128 |     # import time
129 |     # import random
130 |     # timestamp = int(time.time())
131 |     # print random.randint(0, 1000000) + timestamp
132 |     pass
133 | 


--------------------------------------------------------------------------------
/python_utils/al_lt_common/al_str.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | 
  3 | 
  4 | def max_unique_substr(src):
  5 |     LEN = len(src)
  6 | 
  7 |     lst_ret = []
  8 |     cur_max = 0
  9 |     s_tmp = ''
 10 |     for i in xrange(LEN):
 11 |         if src[i] not in s_tmp:
 12 |             s_tmp += src[i]
 13 |             cur_max += 1
 14 |             if len(lst_ret):
 15 |                 if len(lst_ret[0]) < cur_max:
 16 |                     lst_ret = [s_tmp]
 17 |             else:
 18 |                 lst_ret.append(s_tmp)
 19 |         else:
 20 |             idx = s_tmp.find(src[i])
 21 |             s_tmp = s_tmp[idx + 1:] + src[i]
 22 |             cur_max = len(s_tmp)
 23 |             if len(lst_ret[0]) == cur_max and s_tmp not in lst_ret:
 24 |                 lst_ret.append(s_tmp)
 25 | 
 26 |     return lst_ret
 27 | 
 28 | 
 29 | def max_unique_substr_len(src):
 30 |     char_last_idx = [-1] * 256
 31 |     b, e, cur_max, max_len = -1, -1, 0, 0
 32 | 
 33 |     for i in xrange(len(src)):
 34 |         char_idx = ord(src[i])
 35 |         last_idx = char_last_idx[char_idx]
 36 |         if last_idx == -1 or last_idx > e or last_idx < b:
 37 |             char_last_idx[char_idx] = i
 38 |             e += 1
 39 |             cur_max += 1
 40 |             if cur_max > max_len:
 41 |                 max_len = cur_max
 42 |         else:
 43 |             e = i
 44 |             b = last_idx + 1
 45 |             cur_max = e - b + 1
 46 |             char_last_idx[char_idx] = i
 47 | 
 48 |     return max_len
 49 | 
 50 | 
 51 | def test_max_unique_substr():
 52 |     s1 = 'abdefgabef'
 53 |     print max_unique_substr(s1)
 54 |     # ['abdefg', 'bdefga', 'defgab']
 55 |     s1 = 'bbbb'
 56 |     print max_unique_substr(s1)
 57 |     # ['b']
 58 |     s1 = 'geeksforgeeks'
 59 |     print max_unique_substr(s1)
 60 |     # ['eksforg', 'ksforge']
 61 |     s1 = 'qwertqwer'
 62 |     print max_unique_substr(s1)
 63 | 
 64 |     s1 = 'abdefgabef'
 65 |     print max_unique_substr_len(s1)
 66 | 
 67 |     s1 = 'abcd'
 68 |     print max_unique_substr_len(s1)
 69 | 
 70 |     s1 = 'bbbb'
 71 |     print max_unique_substr_len(s1)
 72 | 
 73 |     s1 = 'geeksforgeeks'
 74 |     print max_unique_substr_len(s1)
 75 | 
 76 |     s1 = 'qwertqwer'
 77 |     print max_unique_substr_len(s1)
 78 | 
 79 | 
 80 | def print_lst_str(lst_s):
 81 |     print "".join(lst_s)
 82 | 
 83 | 
 84 | def str_permute(lst_s, b, e):
 85 |     if b == e:
 86 |         print_lst_str(lst_s)
 87 | 
 88 |     for i in xrange(b, e + 1):
 89 |         lst_s[b], lst_s[i] = lst_s[i], lst_s[b]
 90 |         # str_permute(lst_s, i + 1, e)
 91 |         str_permute(lst_s, b + 1, e)
 92 |         lst_s[i], lst_s[b] = lst_s[b], lst_s[i]
 93 | 
 94 | 
 95 | def print_permutation_str(str):
 96 |     n, lst_s = len(str), list(str)
 97 |     str_permute(lst_s, 0, n - 1)
 98 | 
 99 | 
100 | def test_pps():
101 |     s = 'abc'
102 |     print_permutation_str(s)
103 |     s = 'abcd'
104 |     print_permutation_str(s)
105 | 
106 | 
107 | if __name__ == '__main__':
108 |     # test_pps()
109 |     # test_max_unique_substr()
110 | 
111 |     print list('abc')
112 |     # ['a', 'b', 'c']
113 |     pass
114 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/dataset/logistic_regression/lr_ml_action.txt:
--------------------------------------------------------------------------------
  1 | -0.017612   14.053064   0
  2 | -1.395634   4.662541    1
  3 | -0.752157   6.538620    0
  4 | -1.322371   7.152853    0
  5 | 0.423363    11.054677   0
  6 | 0.406704    7.067335    1
  7 | 0.667394    12.741452   0
  8 | -2.460150   6.866805    1
  9 | 0.569411    9.548755    0
 10 | -0.026632   10.427743   0
 11 | 0.850433    6.920334    1
 12 | 1.347183    13.175500   0
 13 | 1.176813    3.167020    1
 14 | -1.781871   9.097953    0
 15 | -0.566606   5.749003    1
 16 | 0.931635    1.589505    1
 17 | -0.024205   6.151823    1
 18 | -0.036453   2.690988    1
 19 | -0.196949   0.444165    1
 20 | 1.014459    5.754399    1
 21 | 1.985298    3.230619    1
 22 | -1.693453   -0.557540   1
 23 | -0.576525   11.778922   0
 24 | -0.346811   -1.678730   1
 25 | -2.124484   2.672471    1
 26 | 1.217916    9.597015    0
 27 | -0.733928   9.098687    0
 28 | -3.642001   -1.618087   1
 29 | 0.315985    3.523953    1
 30 | 1.416614    9.619232    0
 31 | -0.386323   3.989286    1
 32 | 0.556921    8.294984    1
 33 | 1.224863    11.587360   0
 34 | -1.347803   -2.406051   1
 35 | 1.196604    4.951851    1
 36 | 0.275221    9.543647    0
 37 | 0.470575    9.332488    0
 38 | -1.889567   9.542662    0
 39 | -1.527893   12.150579   0
 40 | -1.185247   11.309318   0
 41 | -0.445678   3.297303    1
 42 | 1.042222    6.105155    1
 43 | -0.618787   10.320986   0
 44 | 1.152083    0.548467    1
 45 | 0.828534    2.676045    1
 46 | -1.237728   10.549033   0
 47 | -0.683565   -2.166125   1
 48 | 0.229456    5.921938    1
 49 | -0.959885   11.555336   0
 50 | 0.492911    10.993324   0
 51 | 0.184992    8.721488    0
 52 | -0.355715   10.325976   0
 53 | -0.397822   8.058397    0
 54 | 0.824839    13.730343   0
 55 | 1.507278    5.027866    1
 56 | 0.099671    6.835839    1
 57 | -0.344008   10.717485   0
 58 | 1.785928    7.718645    1
 59 | -0.918801   11.560217   0
 60 | -0.364009   4.747300    1
 61 | -0.841722   4.119083    1
 62 | 0.490426    1.960539    1
 63 | -0.007194   9.075792    0
 64 | 0.356107    12.447863   0
 65 | 0.342578    12.281162   0
 66 | -0.810823   -1.466018   1
 67 | 2.530777    6.476801    1
 68 | 1.296683    11.607559   0
 69 | 0.475487    12.040035   0
 70 | -0.783277   11.009725   0
 71 | 0.074798    11.023650   0
 72 | -1.337472   0.468339    1
 73 | -0.102781   13.763651   0
 74 | -0.147324   2.874846    1
 75 | 0.518389    9.887035    0
 76 | 1.015399    7.571882    0
 77 | -1.658086   -0.027255   1
 78 | 1.319944    2.171228    1
 79 | 2.056216    5.019981    1
 80 | -0.851633   4.375691    1
 81 | -1.510047   6.061992    0
 82 | -1.076637   -3.181888   1
 83 | 1.821096    10.283990   0
 84 | 3.010150    8.401766    1
 85 | -1.099458   1.688274    1
 86 | -0.834872   -1.733869   1
 87 | -0.846637   3.849075    1
 88 | 1.400102    12.628781   0
 89 | 1.752842    5.468166    1
 90 | 0.078557    0.059736    1
 91 | 0.089392    -0.715300   1
 92 | 1.825662    12.693808   0
 93 | 0.197445    9.744638    0
 94 | 0.126117    0.922311    1
 95 | -0.679797   1.220530    1
 96 | 0.677983    2.556666    1
 97 | 0.761349    10.693862   0
 98 | -2.168791   0.143632    1
 99 | 1.388610    9.341997    0
100 | 0.317029    14.739025   0


--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_date_time.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import numpy as np
 4 | import pandas as pd
 5 | import matplotlib.pyplot as plt
 6 | import seaborn
 7 | seaborn.set()
 8 | 
 9 | 
10 | def np_dt():
11 |     date = np.array('2017-09-28', dtype=np.datetime64)
12 |     print date + np.arange(5)
13 |     # ['2017-09-28' '2017-09-29' '2017-09-30' '2017-10-01' '2017-10-02']
14 | 
15 | 
16 | def series_dt():
17 |     index = pd.DatetimeIndex(['2014-07-04', '2014-08-04',
18 |                               '2015-07-04', '2015-08-04'])
19 |     data = pd.Series([0, 1, 2, 3], index=index)
20 |     print data
21 |     # 2014-07-04    0
22 |     # 2014-08-04    1
23 |     # 2015-07-04    2
24 |     # 2015-08-04    3
25 |     # dtype: int64
26 | 
27 |     print data['2014-07-04':'2014-09-04']
28 |     # 2014-07-04    0
29 |     # 2014-08-04    1
30 |     # dtype: int64
31 | 
32 |     print data['2015']
33 |     # 2015-07-04    2
34 |     # 2015-08-04    3
35 |     # dtype: int64
36 | 
37 | 
38 | def pd_time():
39 |     from datetime import datetime
40 |     dates = pd.to_datetime([datetime(2015, 7, 3), '4th of July, 2015',
41 |                             '2015-Jul-6', '07-07-2015', '20150708'])
42 | 
43 |     print dates
44 | 
45 |     print dates.to_period('D')
46 |     # PeriodIndex(['2015-07-03', '2015-07-04', '2015-07-06', '2015-07-07',
47 |     #              '2015-07-08'],
48 |     #             dtype='int64', freq='D')
49 |     print dates - dates[0]
50 |     # TimedeltaIndex(['0 days', '1 days', '3 days',
51 |     #                  '4 days', '5 days'],
52 |     #               dtype='timedelta64[ns]', freq=None)
53 | 
54 |     print pd.date_range('2015-07-03', '2015-07-5')
55 |     # DatetimeIndex(['2015-07-03', '2015-07-04', '2015-07-05'],
56 |     #               dtype='datetime64[ns]', freq='D')
57 |     print pd.date_range('2015-07-03', periods=3)
58 |     # as above
59 | 
60 |     print pd.date_range('2015-07-03', periods=3, freq='H')
61 |     # DatetimeIndex(['2015-07-03 00:00:00', '2015-07-03 01:00:00',
62 |     #                '2015-07-03 02:00:00'],
63 |     #               dtype='datetime64[ns]', freq='H')
64 | 
65 |     print pd.period_range('2015-07', periods=3, freq='M')
66 |     # PeriodIndex(['2015-07', '2015-08', '2015-09'],
67 |     #               dtype='int64', freq='M')
68 | 
69 | 
70 | def pd_time_offset():
71 |     from pandas.tseries.offsets import BDay
72 | 
73 |     print pd.timedelta_range(0, periods=3, freq="2H30T")
74 |     # TimedeltaIndex(['00:00:00', '02:30:00', '05:00:00'],
75 |     #           dtype='timedelta64[ns]', freq='150T')
76 |     print pd.date_range('2015-07-01', periods=3, freq=BDay())
77 |     # DatetimeIndex(['2015-07-01', '2015-07-02', '2015-07-03'],
78 |     #           dtype='datetime64[ns]', freq='B')
79 | 
80 | 
81 | def pandas_datareader_1():
82 |     from pandas_datareader import data
83 |     goog = data.DataReader('GOOG', start='2004', end='2016',
84 |                            data_source='google')
85 | 
86 |     print goog.head()
87 | 
88 |     goog = goog['Close']
89 |     goog.plot()
90 | 
91 | 
92 | if __name__ == '__main__':
93 |     pandas_datareader_1()
94 |     # pd_time_offset()
95 |     # pd_time()
96 |     # series_dt()
97 |     # np_dt()
98 |     pass
99 | 


--------------------------------------------------------------------------------
/python_utils/py_basic/time_ope.py:
--------------------------------------------------------------------------------
  1 | # _*_ coding:utf-8 _*_
  2 | 
  3 | """
  4 | get the seconds since epoch from the time + date output
  5 | function time consume
  6 | https://stackoverflow.com/questions/5478351/python-time-measure-function
  7 | """
  8 | 
  9 | import time
 10 | import timeit
 11 | 
 12 | 
 13 | def test_time_consume():
 14 |     start = time.clock()
 15 |     time.sleep(1)
 16 |     print time.clock() - start
 17 |     # 0.999735009203
 18 | 
 19 | 
 20 | def timing(f):
 21 |     def wrap(*args):
 22 |         start = time.time()
 23 |         ret = f(*args)
 24 |         end = time.time()
 25 |         print '%s function took %0.3f ms' % (f.func_name, (end - start) * 1000.0)
 26 |         return ret
 27 |     return wrap
 28 | 
 29 | 
 30 | @timing
 31 | def test_time():
 32 |     time.sleep(1.1)
 33 |     # test_time function took 1101.000 ms
 34 | 
 35 | 
 36 | def timeit_test():
 37 |     timeit.timeit()
 38 | 
 39 | 
 40 | def sleep_milliseconds(mi_sec=50):
 41 | 
 42 |     time.sleep(mi_sec / 1000.0)
 43 | 
 44 | 
 45 | @timing
 46 | def test_sp_mi_sec():
 47 |     sleep_milliseconds()
 48 | 
 49 | 
 50 | def test_time_transform():
 51 |     # time_stamp = int(time.time())
 52 |     # s = '123'
 53 |     # print time_stamp
 54 |     # # 1509953402
 55 |     # print "%s_%s.pcm" % (s, time_stamp)
 56 |     # # 123_1509953402.pcm
 57 |     # s += str(time_stamp)
 58 |     # print s
 59 | 
 60 |     # print time.ctime(time_stamp)
 61 |     # Thu Jun 28 07:58:58 2018
 62 |     print time.strftime("%Y-%m-%d %H:%M:%S")
 63 |     # 2018-06-28 08:00:35
 64 | 
 65 |     # print time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(1509953402))
 66 |     # # 2017-11-06 15:30:02
 67 |     #
 68 |     # time_stamp = int(time.time())
 69 |     # print time_stamp
 70 |     # print divmod(time_stamp, 3600)
 71 | 
 72 | 
 73 | def str_time():
 74 |     import datetime
 75 |     str_time = '2018-02-01 0:0:0'
 76 |     d = datetime.datetime.strptime(str_time, "%Y-%m-%d %H:%M:%S")
 77 |     print d, d.strftime("%Y-%m-%d %H:%M:%S")
 78 |     # 2018-02-01 00:00:00, 2018-02-01 00:00:00
 79 | 
 80 |     for i in xrange(2):
 81 |         print d, d + datetime.timedelta(minutes=30)
 82 |         d = d + datetime.timedelta(minutes=30)
 83 | 
 84 |     # 2018-02-01 00:00:00 2018-02-01 00:30:00
 85 |     # 2018-02-01 00:30:00 2018-02-01 01:00:00
 86 | 
 87 | 
 88 | if __name__ == '__main__':
 89 |     # import datetime
 90 |     # str_time = '2018-03-08T08:00:00.000'
 91 |     # d = datetime.datetime.strptime(str_time, "%Y-%m-%dT%H:%M:%S.%f")
 92 |     # print d
 93 |     # 2018-03-08 08:00:00
 94 | 
 95 |     import time
 96 |     str_time = '2018-03-08T08:00:00.000'
 97 |     # str_time.replace('T', ' ')
 98 | 
 99 |     # d = time.strftime("%Y-%m-%d %H:%M:%S.%f")
100 | 
101 |     import time
102 | 
103 |     str_time = '2018-03-08T08:00:00.000'
104 |     d = time.strptime(str_time, "%Y-%m-%dT%H:%M:%S.%f")
105 | 
106 |     print d
107 |     # time.struct_time(tm_year=2018, tm_mon=3, tm_mday=8, tm_hour=8, tm_min=0, tm_sec=0, tm_wday=3, tm_yday=67, tm_isdst=-1)
108 | 
109 |     print time.strftime("%Y-%m-%d %H:%M:%S", d)
110 |     # 2018-03-08 08:00:00
111 | 
112 |     # str_time()
113 |     # test_time_consume()
114 |     # test_time()
115 |     # test_sp_mi_sec()
116 |     # test_time_transform()
117 |     pass
118 | 


--------------------------------------------------------------------------------
/python_utils/http_basic/url_ope.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | 
 3 | 
 4 | import urllib
 5 | import requests
 6 | import urlparse
 7 | 
 8 | 
 9 | def url_quote():
10 |     raw_url = 'http://bs2-audiorec.oss-cn-shanghai.aliyuncs.com/e98f81fb526061e92a26eda17955c219.pcm'
11 |     url = urllib.quote(raw_url)
12 |     print url
13 |     # http%3A//bs2-audiorec.oss-cn-shanghai.aliyuncs.com/e98f81fb526061e92a26eda17955c219.pcm
14 |     print urllib.unquote(url)
15 |     # http://bs2-audiorec.oss-cn-shanghai.aliyuncs.com/e98f81fb526061e92a26eda17955c219.pcm
16 |     print urllib.quote("河=&源")
17 |     # %E6%B2%B3%E6%BA%90
18 | 
19 | 
20 | def req_with_para():
21 |     d_para = {"name": "xy", "age": 21}
22 |     print requests.get('http://xy.com', params=d_para)
23 | 
24 |     # ordered name-value pairs
25 |     d_sorted_para = [("age", 21), ("name", "xy")]
26 |     print requests.get('http://xy.com', params=d_sorted_para)
27 | 
28 | 
29 | def url_encode_v1():
30 |     f = {'eventName': 'myEvent', 'eventDescription': '飞龙在天'}
31 |     print urllib.urlencode(f)
32 |     # eventName=myEvent&eventDescription=%E9%A3%9E%E9%BE%99%E5%9C%A8%E5%A4%A9
33 | 
34 | 
35 | def url_encode_v2():
36 |     d_para = {"name": "xy熊大", "age": 21}
37 |     print '&'.join('%s=%s' % (k, v) for k, v in d_para.iteritems())
38 |     # age=21&name=xy熊大
39 |     print '&'.join('%s=%s' % (k, urllib.quote(str(v))) for k, v in d_para.iteritems())
40 |     # age=21&name=xy%E7%86%8A%E5%A4%A7
41 | 
42 |     base_url = 'xy.com/'
43 |     url = 'http://%s?%s' % (base_url, '&'.join('%s=%s' % (k, urllib.quote(str(v))) for k, v in d_para.iteritems()))
44 |     print url
45 |     # http://xy.com/?age=21&name=xy%E7%86%8A%E5%A4%A7
46 | 
47 |     print urllib.unquote(url)
48 |     # http://xy.com/?age=21&name=xy熊大
49 | 
50 | 
51 | def post_request_tw():
52 |     mp4Url = 'https://bilinimg.bs2ul-ssl.yy.com/android2222.mp4'
53 |     mp4Url = 'http://bilinaudiop.bs2dl.yy.com/odgud7b58056e79243f6bacb6580ce0506b1_36695268409460340_37155969.mp4?token=sgCAAFyARE0BAM2BQ1oAAAAAfTtDWgAAAAAMsEEkB0NPTlRFWFQJaQB7ImJ1Y2tldCI6ImJpbGluYXVkaW9wIiwiZmlsZW5hbWUiOiJvZGd1ZDdiNTgwNTZlNzkyNDNmNmJhY2I2NTgwY2UwNTA2YjFfMzY2OTUyNjg0MDk0NjAzNDBfMzcxNTU5NjkubXA0In0EQVVUSAMEAAMAAADI_C7ba_qUVQLIXkAWf7r_sF_FnQ'
54 | 
55 |     d_para = {"mp4Url": mp4Url,
56 |               "secretKey": "XY-bl-audio-rec-text-ret",
57 |               "serial": "17598411"}
58 | 
59 |     print urllib.urlencode(d_para)
60 | 
61 |     # rsp = requests.get('http://172.27.49.16:8887/bilin/audiorec/', params=d_para)
62 |     rsp = requests.get('http://61.147.186.82:9997/bilin/audiorec/', params=d_para)
63 |     print rsp.url  # 输出请求的 url
64 |     print rsp.content
65 | 
66 |     s = "sign=052c177ab75dfd53ab6b1cdc25569ef1&text=%E9%83%BD%E6%95%8F%E6%B3%95%E8%BD%AE%E5%8A%9F%E7%BB%83%E4%B9%A0%E8%80%85%E8%B7%B3%E6%A5%BC%E5%89%B2%E8%85%95%E6%8A%95%E6%B2%B3%EF%BC%8C&ts=1513238084&code=0&serial=17598411"
67 |     print urllib.unquote(s)
68 | 
69 | 
70 | def url_parse():
71 |     """get url query parameters"""
72 |     url = 'http://foo.appspot.com/abc?def=ghi'
73 |     parsed = urlparse.urlparse(url)
74 | 
75 |     print urlparse.parse_qs(parsed.query)['def']
76 |     # ['ghi']
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     url_parse()
81 |     # post_request_tw()
82 |     # url_quote()
83 |     # url_encode_v1()
84 |     # url_encode_v2()
85 |     pass
86 | 


--------------------------------------------------------------------------------
/python_utils/numpy_operate/random_arr.py:
--------------------------------------------------------------------------------
  1 | # _*_ coding:utf-8 _*_
  2 | 
  3 | """
  4 | This module is some example about random array
  5 | """
  6 | import numpy as np
  7 | 
  8 | 
  9 | def generate_random_2d_arr(col, row):
 10 |     """
 11 |     generate random 2d array from 0~col*row
 12 |     :param col: the num of column
 13 |     :param row: the num of row
 14 |     :return: like generate_random_2d_arr(4, 3)
 15 |                     [[ 7 10  5]
 16 |                      [ 3  4  2]
 17 |                      [ 8 11  6]
 18 |                      [ 9  1  0]]
 19 |     """
 20 |     return np.random.permutation(col * row).reshape(row, col)
 21 | 
 22 | 
 23 | def random_arr():
 24 |     a = np.random.random(size=(2, 4))
 25 |     print a
 26 |     # [[ 0.13652737  0.32546344  0.58527282  0.0899639 ]
 27 |     #  [ 0.21190661  0.05351992  0.42603268  0.17524264]]
 28 | 
 29 | 
 30 | def random_int_arr():
 31 |     print np.random.random_integers(5)
 32 |     # like 3
 33 |     arr = np.random.random_integers(12, size=(3, 4))
 34 |     print arr
 35 |     # [[ 2  9  7  6]
 36 |     #  [ 9  1  9  1]
 37 |     #  [ 8  6 11  5]]
 38 |     d1 = np.random.random_integers(1, 6, 10)
 39 |     print d1
 40 |     # [6 4 5 2 4 1 1 5 6 2]
 41 |     arr_f = 0.5 * (np.random.random_integers(12, size=(8, )) - 1)
 42 |     print arr_f
 43 |     # [ 5.5  2.5  2.5  1.   4.   4.5  5.5  3. ]
 44 |     print np.random.randint(12, size=(3, 4))
 45 |     # [[0 7 1 8]
 46 |     #  [7 1 1 2]
 47 |     #  [8 4 9 3]]
 48 | 
 49 | 
 50 | def sample_rows():
 51 |     arr1 = np.random.randint(5, size=(5, 3))
 52 |     print arr1
 53 |     # [[0 0 2]
 54 |     #  [1 2 0]
 55 |     #  [0 0 4]
 56 |     #  [3 3 4]
 57 |     #  [4 3 2]]
 58 | 
 59 |     print arr1[[1, 2]]
 60 |     # [[1 2 0]
 61 |     #  [0 0 4]]
 62 | 
 63 |     idx = np.random.randint(5, size=2)
 64 |     print idx
 65 |     # [1 2]
 66 |     print arr1[idx, :]
 67 |     # [[1 2 0]
 68 |     #  [0 0 4]]
 69 |     print arr1[idx, ]
 70 |     # [[1 2 0]
 71 |     #  [0 0 4]]
 72 | 
 73 |     print arr1[np.random.randint(arr1.shape[0], size=2), :]
 74 |     # [[0 0 2]
 75 |     #  [4 3 2]]
 76 | 
 77 | 
 78 | def choice_arr():
 79 |     """
 80 |     numpy.random.choice(a, size=None, replace=True, p=None)
 81 |     Generates a random sample from a given 1-D array
 82 |     a : 1-D array-like or int
 83 |     If an ndarray, a random sample is generated from its elements.
 84 |     If an int, the random sample is generated as if a were np.arange(a)
 85 |     """
 86 |     arr1 = np.arange(5)
 87 |     print arr1
 88 |     # [0 1 2 3 4]
 89 |     print np.random.choice(arr1, 2)
 90 |     # [4 0]
 91 |     print np.random.choice(5, 2)
 92 |     # [3 0]
 93 | 
 94 | 
 95 | def ran_seed():
 96 |     sd = 3
 97 |     np.random.seed(sd)
 98 |     print np.random.rand(4)
 99 |     # [ 0.5507979   0.70814782  0.29090474  0.51082761]
100 |     print np.random.rand(4)
101 |     # [ 0.89294695  0.89629309  0.12558531  0.20724288]
102 | 
103 |     np.random.seed(sd)
104 |     print np.random.rand(4)
105 |     # [ 0.5507979   0.70814782  0.29090474  0.51082761]
106 | 
107 |     np.random.seed(sd)
108 |     arr = np.random.randint(5, size=(2, 3))
109 |     print arr
110 |     # [[2 0 1]
111 |     #  [3 0 0]]
112 |     arr = np.random.randint(5, size=(2, 3))
113 |     print arr
114 |     # [[0 3 2]
115 |     #  [3 1 1]]
116 |     np.random.seed(sd)
117 |     arr = np.random.randint(5, size=(2, 3))
118 |     print arr
119 |     # [[2 0 1]
120 |     #  [3 0 0]]
121 | 
122 | if __name__ == '__main__':
123 |     ran_seed()
124 |     # choice_arr()
125 |     # print generate_random_2d_arr(4, 3)
126 |     # random_arr()
127 |     # random_int_arr()
128 | 
129 |     pass
130 | 


--------------------------------------------------------------------------------
/python_utils/matplot/basic.py:
--------------------------------------------------------------------------------
  1 | # _*_ coding:utf-8 _*_
  2 | """
  3 | This file is about `matplotlib`
  4 | Mainly cited from http://matplotlib.org/users/pyplot_tutorial.html
  5 | """
  6 | 
  7 | import matplotlib.pyplot as plt
  8 | import numpy as np
  9 | import random
 10 | 
 11 | 
 12 | def basic_linear():
 13 |     y_lst = [1, 2, 3, 4]
 14 |     # y_lst = random.sample(range(80000), 500)
 15 |     plt.plot(y_lst)
 16 |     plt.ylabel('y axis value')
 17 |     plt.show()
 18 | 
 19 | 
 20 | def plot_x1_y4():
 21 |     """
 22 |     plot a line x = 1
 23 |     :return:
 24 |     """
 25 |     plt.plot([0, 0], [0, 4], color='red', linewidth=3.0)
 26 |     plt.axis([-1, 1, -4, 4])
 27 |     plt.show()
 28 | 
 29 | 
 30 | def basic_curve():
 31 |     x = np.linspace(0, 2, 11)
 32 |     print x
 33 |     y = x ** 3 - 5 * x ** 2 + 6 * x + 1
 34 |     print y
 35 |     # plt.plot(x, y, 'r-')
 36 |     # plt.plot(x, y)
 37 |     lines = plt.plot([1, 2, 3, 4], [1, 4, 9, 16])
 38 |     plt.setp(lines, color='r')
 39 |     plt.show()
 40 |     # plt.axis([0, 100, 0, 100])
 41 | 
 42 | 
 43 | def multi_curve():
 44 |     t = np.arange(0., 5., 0.2)
 45 |     print t
 46 |     # plt.plot(t, t, 'r-', t, t**2, 'bs', t, t**3, 'g^')
 47 |     # plt.show()
 48 | 
 49 | 
 50 | def f(t):
 51 |     return np.exp(-t) * np.cos(2 * np.pi * t)
 52 | 
 53 | 
 54 | def multi_figure():
 55 |     plt.figure(1)  # the first figure
 56 |     plt.subplot(211)  # the first subplot in the first figure
 57 |     plt.plot([1, 2, 3])
 58 |     plt.subplot(212)  # the second subplot in the first figure
 59 |     plt.plot([4, 5, 6, 7, 11])
 60 | 
 61 |     plt.figure(2)  # a second figure
 62 |     plt.plot([4, 5, 6])  # creates a subplot(111) by default
 63 | 
 64 |     plt.figure(1)  # figure 1 current; subplot(212) still current
 65 |     plt.subplot(211)  # make subplot(211) in figure1 current
 66 |     plt.title('Easy as 1, 2, 3')  # subplot 211 title
 67 | 
 68 |     plt.show()
 69 | 
 70 | 
 71 | def multi_figure_two():
 72 |     t1 = np.arange(0., 5, 0.1)
 73 |     t2 = np.arange(0., 5, 0.02)
 74 | 
 75 |     plt.figure(1)
 76 |     plt.subplot(211)
 77 |     plt.plot(t1, f(t1), 'k')
 78 | 
 79 |     plt.subplot(212)
 80 |     plt.plot(t2, np.cos(2 * np.pi * t2), 'bo')
 81 | 
 82 |     plt.show()
 83 | 
 84 | 
 85 | def histogram():
 86 |     x_mul = [np.random.randn(n) for n in [1000, 1000, 1000]]
 87 |     print x_mul
 88 |     bin = 10
 89 |     plt.hist(x_mul, bin)
 90 |     plt.show()
 91 | 
 92 | 
 93 | def histogram_two():
 94 |     x_mul = [random.sample(range(0, 100), n) for n in [60, 50, 70]]
 95 |     print x_mul[0]
 96 |     print x_mul[1]
 97 |     print x_mul[2]
 98 |     bin = 10
 99 |     plt.hist(x_mul, bin)
100 |     plt.show()
101 | 
102 | 
103 | def plot_2d():
104 |     x = [1, 2, 3, 4, 5, 6, 7]
105 |     y = [2.6, 3.6, 8.3, 56, 12.7, 8.9, 5.3]
106 |     plt.plot(x, y)      # plot line
107 |     # plt.scatter(x, y)   # plot scatter
108 |     plt.show()
109 | 
110 | 
111 | def plot_orthogonal():
112 |     arr = np.array([[-0.85389096, -0.52045195], [0.52045195, -0.85389096]])
113 |     # arr = np.array([[1, -1], [1, 1]])
114 |     v1_x, v2_x = [arr[:, 0][0], 0], [arr[:, 1][0], 0]
115 |     v1_y, v2_y = [arr[:, 0][1], 0], [arr[:, 1][1], 0]
116 |     plt.plot(v1_x, v1_y)
117 |     plt.plot(v2_x, v2_y)
118 |     # plt.axis([-1, 1, -1, 1])
119 |     # set the below bound, or the line won't seem orthogonal
120 |     # plt.axis([-0.85389096, 0.52045195, -0.85389096, 0.52045195])
121 |     plt.show()
122 | 
123 | 
124 | if __name__ == '__main__':
125 |     plot_orthogonal()
126 |     # plot_2d()
127 |     # basic_linear()
128 |     # basic_curve()
129 |     # multi_curve()
130 |     # multi_figure()
131 |     # multi_figure_two()
132 |     # histogram()
133 |     # histogram_two()
134 |     pass
135 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/neural_network_keras/nn_keras_digits.py:
--------------------------------------------------------------------------------
  1 | # _*_ coding:utf-8 _*_
  2 | """
  3 | This file is mainly use keras to recognize digits,with
  4 | Multi-Layer perceptrons or neural network
  5 | """
  6 | from keras.datasets import mnist
  7 | import gzip
  8 | import matplotlib.pyplot as plt
  9 | import sys
 10 | # from six.moves import cPickle
 11 | import cPickle
 12 | import numpy as np
 13 | from keras.models import Sequential
 14 | from keras.layers import Dense, Activation
 15 | from keras.layers import Dropout
 16 | from keras.utils import np_utils
 17 | 
 18 | 
 19 | def load_data():
 20 |     """
 21 |     this function is used to load data
 22 |     :return:
 23 |     """
 24 |     file = '../dataset/mnist/mnist.pkl.gz'
 25 |     f = gzip.open(file, 'rb')
 26 |     if sys.version_info < (3,):
 27 |         data = cPickle.load(f)
 28 |     else:
 29 |         data = cPickle.load(f, encoding='bytes')
 30 |     f.close()
 31 |     return data
 32 | 
 33 | 
 34 | def show_image():
 35 |     """
 36 |     this function is for a test to show, server image
 37 |     :return:
 38 |     """
 39 |     (X_train, y_train), (X_validation, y_validation), (X_test, y_test) = load_data()
 40 |     # (X_train, y_train), (X_test, y_test) = mnist.load_data()
 41 |     # plot 4 images as gray scale
 42 |     plt.subplot(221)
 43 |     plt.imshow(X_train[0], cmap=plt.get_cmap('gray'))
 44 |     plt.subplot(222)
 45 |     plt.imshow(X_train[1], cmap=plt.get_cmap('gray'))
 46 |     plt.subplot(223)
 47 |     plt.imshow(X_train[2], cmap=plt.get_cmap('gray'))
 48 |     plt.subplot(224)
 49 |     plt.imshow(X_train[3], cmap=plt.get_cmap('gray'))
 50 |     # show the plot
 51 |     plt.show()
 52 | 
 53 | 
 54 | def generate_data():
 55 |     (X_train, y_train), (X_test, y_test) = load_data()
 56 | 
 57 |     # flatten 28*28 images to a 784 vector for each image
 58 |     print X_train.shape[1], X_train.shape[2], X_train.shape
 59 |     # X_train.shape -> (60000L, 28L, 28L)
 60 |     num_pixels = X_train.shape[1] * X_train.shape[2]
 61 |     X_train = X_train.reshape(X_train.shape[0], num_pixels).astype('float32')
 62 |     X_test = X_test.reshape(X_test.shape[0], num_pixels).astype('float32')
 63 | 
 64 |     # normalize inputs from 0-255 to 0-1
 65 |     X_train = X_train / 255
 66 |     X_test = X_test / 255
 67 | 
 68 |     y_train = np_utils.to_categorical(y_train)
 69 |     y_test = np_utils.to_categorical(y_test)
 70 |     # print y_train.shape, y_test.shape
 71 |     # y_train.shape -> (60000L, 10L), y_test.shape -> (10000L, 10L)
 72 |     num_classes = y_test.shape[1]
 73 | 
 74 |     return X_train, y_train, X_test, y_test
 75 | 
 76 | 
 77 | def baseline_model():
 78 |     """
 79 |     define baseline model
 80 |     :return:
 81 |     """
 82 |     # create model
 83 |     model = Sequential()
 84 | 
 85 |     num_pixels = 784
 86 |     # model.add(Dense(num_pixels, input_dim=num_pixels, init='normal', activation='relu'))
 87 |     model.add(Dense(num_pixels, input_dim=num_pixels, activation='relu'))
 88 |     num_classes = 10
 89 |     # model.add(Dense(num_classes, init='normal', activation='softmax'))
 90 |     model.add(Dense(num_classes, activation='softmax'))
 91 |     # Compile model
 92 |     model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
 93 |     return model
 94 | 
 95 | 
 96 | def train_and_evaluate():
 97 |     X_train, y_train, X_test, y_test = generate_data()
 98 |     model = baseline_model()
 99 |     model.fit(X_train, y_train, validation_data=(X_test, y_test), nb_epoch=10, batch_size=200, verbose=2)
100 |     # Final evaluation of the model
101 |     scores = model.evaluate(X_test, y_test, verbose=0)
102 |     print("Baseline Error: %.2f%%" % (100-scores[1]*100))
103 | 
104 | 
105 | if __name__ == '__main__':
106 |     # load_data()
107 |     generate_data()
108 |     # train_and_evaluate()
109 |     pass
110 | 


--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_pivot.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | import seaborn as sns
  6 | import matplotlib.pyplot as plt
  7 | sns.set() # use Seaborn styles
  8 | 
  9 | 
 10 | def titanic_1():
 11 |     titanic = sns.load_dataset('titanic')
 12 |     print titanic.head()
 13 |     #    survived  pclass     sex   age  ......
 14 |     #           0       0    male    22
 15 |     # 1         1       1  female  38.0
 16 |     # 2         1       3  female  26.0
 17 |     # 3         1       1  female  35.0
 18 |     # 4         0       3    male  35.0
 19 | 
 20 |     print titanic.groupby('sex')[['survived']].mean()
 21 |     #         survived
 22 |     # sex
 23 |     # female  0.742038
 24 |     # male    0.188908
 25 | 
 26 |     print titanic.groupby(['sex', 'class'])['survived'].aggregate('mean').unstack()
 27 |     # class      First    Second     Third
 28 |     # sex
 29 |     # female  0.968085  0.921053  0.500000
 30 |     # male    0.368852  0.157407  0.135447
 31 | 
 32 |     print titanic.pivot_table('survived', index='sex', columns='class')
 33 |     # class      First    Second     Third
 34 |     # sex
 35 |     # female  0.968085  0.921053  0.500000
 36 |     # male    0.368852  0.157407  0.135447
 37 | 
 38 |     age = pd.cut(titanic['age'], [0, 18, 80])
 39 |     print titanic.pivot_table('survived', ['sex', age], 'class')
 40 |     # class               First    Second     Third
 41 |     # sex    age
 42 |     # female (0, 18]   0.909091  1.000000  0.511628
 43 |     #        (18, 80]  0.972973  0.900000  0.423729
 44 |     # male   (0, 18]   0.800000  0.600000  0.215686
 45 |     #        (18, 80]  0.375000  0.071429  0.133663
 46 | 
 47 |     print titanic.pivot_table(index='sex', columns='class',
 48 |                               aggfunc={'survived': sum, 'fare': 'mean'})
 49 | 
 50 |     print titanic.pivot_table('survived', index='sex', columns='class', margins=True)
 51 |     # class      First    Second     Third       All
 52 |     # sex
 53 |     # female  0.968085  0.921053  0.500000  0.742038
 54 |     # male    0.368852  0.157407  0.135447  0.188908
 55 |     # All     0.629630  0.472826  0.242363  0.383838
 56 | 
 57 | 
 58 | def births_demo():
 59 |     path = 'E:/python_code/births.csv'
 60 |     births = pd.read_csv(path)
 61 |     print births.head()
 62 |     #     year  month day gender  births
 63 |     # 0  1969      1   1      F    4046
 64 |     # 1  1969      1   1      M    4440
 65 |     # 2  1969      1   2      F    4454
 66 |     # 3  1969      1   2      M    4548
 67 |     # 4  1969      1   3      F    4548
 68 | 
 69 |     births['decade'] = 10 * (births['year'] // 10)
 70 |     print births.pivot_table('births', index='decade', columns='gender', aggfunc='sum')
 71 |     # gender         F         M
 72 |     # decade
 73 |     # 1960     1753634   1846572
 74 |     # 1970    16263075  17121550
 75 |     # 1980    18310351  19243452
 76 |     # 1990    19479454  20420553
 77 |     # 2000    18229309  19106428
 78 | 
 79 |     births.pivot_table('births', index='year', columns='gender', aggfunc='sum').plot()
 80 | 
 81 |     plt.ylabel('total births per year')
 82 |     plt.show()
 83 | 
 84 |     # create a datetime index from the year, month, day
 85 |     births.index = pd.to_datetime(10000 * births.year +
 86 |                                   100 * births.month +
 87 |                                   births.day, format='%Y%m%d')
 88 | 
 89 |     births['dayofweek'] = births.index.dayofweek
 90 |     births.pivot_table('births', index='dayofweek',
 91 |                        columns='decade', aggfunc='mean').plot()
 92 | 
 93 |     plt.gca().set_xticklabels(['Mon', 'Tues', 'Wed', 'Thurs', 'Fri', 'Sat', 'Sun'])
 94 | 
 95 |     plt.ylabel('mean births by day')
 96 |     plt.show()
 97 | 
 98 | if __name__ == '__main__':
 99 |     births_demo()
100 |     # titanic_1()
101 |     pass
102 | 


--------------------------------------------------------------------------------
/python_utils/thread_process/thread_queue.py:
--------------------------------------------------------------------------------
  1 | # _*_ coding:utf-8 _*_
  2 | """
  3 | 
  4 | """
  5 | import Queue
  6 | import threading
  7 | import logging
  8 | import random
  9 | import time
 10 | from basic_thread import join_all_others_thread
 11 | logging.basicConfig(level=logging.DEBUG,
 12 |                     format='%(levelname)s %(asctime)s %(threadName)s %(message)s',
 13 |                     datefmt='%Y-%m-%d %I:%M:%S')
 14 | lst_que = Queue.Queue()
 15 | 
 16 | 
 17 | def produce_item():
 18 |     return threading.currentThread().name, random.randint(0, 10)
 19 |     pass
 20 | 
 21 | 
 22 | def producer(num):
 23 |     for i in xrange(num):
 24 |         item = produce_item()
 25 |         lst_que.put(item)
 26 |         logging.info('produce item : ' + str(item))
 27 |         time.sleep(0.5)
 28 | 
 29 | 
 30 | def consume():
 31 |     while True:
 32 |         try:
 33 |             # non-block if lst_queue is empty then, it will raise Empty error
 34 |             item = lst_que.get(False)
 35 |             if item:
 36 |                 logging.debug('consume item: ' + str(item))
 37 |             time.sleep(0.5)
 38 |         except Queue.Empty, e:
 39 |             # if lst_que is empty then do the following code snippet
 40 |             logging.warn('queue empty ' + str(e) + 'now sleep 1 S')
 41 |             time.sleep(1)
 42 | 
 43 | 
 44 | def create_mul_thread(thread_num, prefix_name, target_name):
 45 |     """
 46 |     A template of creating and starting n thread, do the same task.
 47 |     :param thread_num:  the num of thread
 48 |     :param prefix_name:
 49 |     :param target_name:
 50 |     :return:
 51 |     """
 52 |     for i in xrange(thread_num):
 53 |         t_name = prefix_name + str(i)
 54 |         produce_num = random.randint(10, 100)
 55 |         if prefix_name == 'consume--':
 56 |             t = threading.Thread(name=t_name, target=target_name)
 57 |         else:
 58 |             t = threading.Thread(name=t_name, target=target_name, args=(produce_num, ))
 59 |         t.start()
 60 | 
 61 | 
 62 | def create_mul_thread_producer(num):
 63 |     for i in xrange(num):
 64 |         t_name = 'producer--' + str(i)
 65 |         produce_num = random.randint(10, 100)
 66 |         t = threading.Thread(name=t_name, target=producer, args=(produce_num, ))
 67 |         t.start()
 68 | 
 69 | 
 70 | def test_consume_produce_queue():
 71 |     produce_num, consume_num = 2, 3
 72 |     # create 2 producer thread
 73 |     create_mul_thread(produce_num, 'producer--', producer)
 74 |     # create 3 consumer thread
 75 |     create_mul_thread(consume_num, 'consume--', consume)
 76 |     pass
 77 | 
 78 | 
 79 | def consume_echo():
 80 |     logging.info('set gpu mode, load caffe net')
 81 |     while True:
 82 |         item = lst_que.get(True)
 83 |         logging.info('recognize %s', item)
 84 | 
 85 | 
 86 | def get_input_text():
 87 |     while True:
 88 |         text = raw_input("please input a sentence")
 89 |         lst_que.put(text, True)
 90 | 
 91 |         if 'exit' == text:
 92 |             break
 93 | 
 94 | 
 95 | def create_echo_cp():
 96 |     """
 97 |     create consume_echo, get_input_text thread
 98 |     """
 99 |     c1 = threading.Thread(name="c1", target=consume_echo)
100 |     c2 = threading.Thread(name="c2", target=consume_echo)
101 |     p1 = threading.Thread(name="p1", target=get_input_text)
102 |     c1.start()
103 |     c2.start()
104 |     p1.start()
105 |     join_all_others_thread()
106 | 
107 | 
108 | def get_que_len():
109 |     global lst_que
110 | 
111 |     lst_que.put('abc')
112 |     print lst_que.qsize()
113 |     lst_que.put('abc')
114 |     lst_que.put('abc')
115 |     print lst_que.qsize()
116 |     lst_que.put('abc')
117 |     lst_que.put('abc')
118 |     print lst_que.qsize()
119 | 
120 | if __name__ == '__main__':
121 |     # test_consume_produce_queue()
122 |     # create_echo_cp()
123 |     s = 'abc'
124 |     s = s + '123'
125 |     print s
126 |     pass
127 | 


--------------------------------------------------------------------------------
/python_utils/numpy_operate/array_create.py:
--------------------------------------------------------------------------------
  1 | # _*_ coding:utf-8 _*_
  2 | 
  3 | """
  4 | This file is about ways to create different types of np.array(),
  5 | like identity, diagonal matrix and so on.
  6 | """
  7 | 
  8 | import numpy as np
  9 | 
 10 | 
 11 | def common_create():
 12 |     """
 13 |     common way of creating array
 14 |     :return: none
 15 |     """
 16 |     arr1 = np.array([1, 2])
 17 |     print arr1
 18 |     # [1 2]
 19 |     print arr1.shape
 20 |     # (2L,)
 21 |     arr2 = np.array([[1, 2], [3.1, 4.]])
 22 |     print arr2
 23 |     # [[ 1.   2. ]
 24 |     #  [ 3.1  4. ]]
 25 |     print arr2.shape
 26 |     # (2L, 2L)
 27 |     arr3 = np.array([[1, 2], [3, 4]], dtype=complex)
 28 |     print arr3
 29 |     # [[ 1.+0.j  2.+0.j]
 30 |     #  [ 3.+0.j  4.+0.j]]
 31 | 
 32 | 
 33 | def about_shape():
 34 |     print np.array([[1, 2, 3], [3, 4, 5]]).shape
 35 |     # (2L, 3L)
 36 |     print np.array([[1, 2, 3]]).shape
 37 |     # (1L, 3L)
 38 |     print np.array([1, 2, 3]).shape
 39 |     # (3L,)
 40 |     arr1 = np.array([[1, 2, 3], [3, 4, 5]])
 41 |     arr2 = np.array([1, 2, 3])
 42 |     print arr1 * arr2
 43 |     # [[ 1  4  9]
 44 |     #    [ 3  8 15]]
 45 |     # arr22 = np.array([[1], [2], [3]])
 46 |     # print arr1 * arr22
 47 | 
 48 |     print np.dot(arr1, arr2)
 49 |     # [14 26]
 50 | 
 51 | 
 52 | def about_reshape():
 53 |     arr = np.array([[1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15]])
 54 |     print arr.reshape(2, 6)
 55 |     # [[ 1  2  3  4  5  6]
 56 |     #  [10 11 12 13 14 15]]
 57 |     b = np.arange(1, 13).reshape((2, 2, 3))
 58 |     print b
 59 |     # [[[ 1  2  3]
 60 |     #   [ 4  5  6]]
 61 |     #
 62 |     #  [[ 7  8  9]
 63 |     #   [10 11 12]]]
 64 |     print b.reshape((2, 6))
 65 |     # [[ 1  2  3  4  5  6]
 66 |     #  [ 7  8  9 10 11 12]]
 67 | 
 68 | 
 69 | def lst_2_array():
 70 |     """
 71 |     list, tuple to array
 72 |     :return: none
 73 |     """
 74 |     tp = (1, 2, 3)
 75 |     lst = [[1, 2], [3, 4]]
 76 |     print np.array(lst).shape
 77 |     # (2L, 2L)
 78 |     print np.array(lst)
 79 |     # [[1 2]
 80 |     #   [3 4]]
 81 |     print np.asarray(lst)
 82 |     # [[1 2]
 83 |     #   [3 4]]
 84 |     print np.asarray(tp)
 85 |     # [1 2 3]
 86 | 
 87 | 
 88 | def file_2np_arr():
 89 |     """txt file to numpy array"""
 90 |     data_path = '../machine_learn/dataset/perception/dataset.txt'
 91 |     x = np.loadtxt(data_path, dtype=float)
 92 |     print x
 93 |     # [ [ 1.1   2.2   0]
 94 |     #   [ 3.5   3.6   1]]
 95 | 
 96 | 
 97 | def empty_arr():
 98 |     arr1 = np.arange(12).reshape(3, 4)
 99 |     print arr1
100 |     # [[ 0  1  2  3]
101 |     #  [ 4  5  6  7]
102 |     #  [ 8  9 10 11]]
103 |     arr2 = np.empty(arr1.shape)
104 |     print arr2
105 |     # [[ 0.  0.  0.  0.]
106 |     #  [ 0.  0.  0.  0.]
107 |     #  [ 0.  0.  0.  0.]]
108 | 
109 | 
110 | def test_ndim():
111 |     # Number of array dimensions.
112 | 
113 |     x = np.array([1, 2, 3])
114 |     print x.ndim
115 |     # 1
116 | 
117 |     y = np.array([[1, 2, 3], [4, 5, 6]])
118 |     print y.ndim
119 |     # 2
120 | 
121 |     z = np.arange(12).reshape((2, 2, 3))
122 |     print z.ndim
123 |     # 3
124 | 
125 | 
126 | if __name__ == '__main__':
127 |     test_ndim()
128 |     # empty_arr()
129 |     # file_2np_arr()
130 |     # broadcast_demo()
131 |     # about_shape()
132 |     # common_create()
133 |     # lst_2_array()
134 |     # about_reshape()
135 |     lst = [[1.1, 2.3, 3], [3, 4, 5]]
136 |     arr = np.array(lst)
137 |     # print arr[1, :]
138 |     # [ 3.  4.  5.]
139 |     # print arr[1]
140 |     # [ 3.  4.  5.]
141 |     # print arr[...]
142 | 
143 |     a = np.array([[1.1, 2.3, 3]])
144 |     # print a[0].tolist()
145 |     # print "".join([str(i) + "-" for i in a[0].tolist()])
146 |     # 1.1-2.3-3.0-
147 |     # if 4 in [1, 3, 5]:
148 |     #     print 'in it'
149 |     # print arr[0, 1], arr[0, 1].flatten.A[0]
150 |     # print np.array(lst)[:-1]
151 |     pass
152 | 


--------------------------------------------------------------------------------
/python_utils/utils/JsonUtil.py:
--------------------------------------------------------------------------------
 1 | # _*_ coding:utf-8 _*_
 2 | import json
 3 | import codecs
 4 | 
 5 | 
 6 | def byteify(input):
 7 |     """
 8 |     the string of json typed unicode to str in python
 9 |     This function coming from stack overflow
10 |     :param input: {u'first_name': u'Guido', u'last_name': u'jack'}
11 |     :return:      {'first_name': 'Guido', 'last_name': 'jack'}
12 |     """
13 |     if isinstance(input, dict):
14 |         return {byteify(key): byteify(value)
15 |                 for key, value in input.iteritems()}
16 |     elif isinstance(input, list):
17 |         return [byteify(element) for element in input]
18 |     elif isinstance(input, unicode):
19 |         return input.encode('utf-8')
20 |     else:
21 |         return input
22 | 
23 | 
24 | def get_json_from_file(filename):
25 |     """注意如果自己创建的json.txt文件读取有问题，可以在pycharm中创建file.json然后再读取"""
26 |     with open(filename) as jf:
27 |         jsondata = json.load(jf)
28 | 
29 |     return byteify(jsondata)
30 | 
31 | 
32 | def put_unicode_to_str():
33 |     data = {"ocrMsg": u"\u556a\u556a\u76f4\u64ad\u514d\u8d39\u8bd5"}
34 |     js = json.dumps(data, indent=4, ensure_ascii=False)
35 |     print js
36 | 
37 | 
38 | def generate_keyword_jsonfile(lst):
39 |     """
40 |     generate keyword_jsonfile 'search_keywords.json' to
41 |     the directory config
42 |     :return:
43 |     """
44 | 
45 |     js_data = {"0": [], "1": [], "2": []}
46 |     for item in lst:
47 |         id, value = str(item[0]), item[1]
48 |         js_data[id].append(value)
49 | 
50 |     with codecs.open('../config/search_keywords.json', 'w', encoding='utf-8') as fp:
51 |         # json.dump(js_data, fp)
52 |         fp.write(json.dumps(js_data, indent=4, sort_keys=True, ensure_ascii=False))
53 | 
54 | if __name__ == '__main__':
55 |     # generate_keyword_jsonfile()
56 |     # print get_json_from_file('../config/search_keywords.json')
57 |     js = {"aaData":[ {"id":21,"keyword":"\u8D85\u7BA1","keywordType":0}, {"id":43,"keyword":"\u516C\u5B89","keywordType":0}, {"id":44,"keyword":"\u519B\u88C5","keywordType":1}, {"id":45,"keyword":"\u66B4\u529B","keywordType":1}, {"id":46,"keyword":"\u519B\u670D","keywordType":2}, {"id":47,"keyword":"\u9732\u4E73","keywordType":2},], "data":[{"$ref":"$.aaData[0]"},{"$ref":"$.aaData[1]"}, {"$ref":"$.aaData[2]"},{"$ref":"$.aaData[3]"}, {"$ref":"$.aaData[2]"},{"$ref":"$.aaData[3]"}], "error":False,"iTotalDisplayRecords":6,"iTotalRecords":6,"recordsFiltered":6,"recordsTotal":6,"sEcho":"1","success":True}
58 |     js = {"aaData":[{"id":195261,"keywordCore":"直播","keywordDepartment":"YY","keywordWarn":"裸聊","newsDate":"2017-02-28 20:14:01","newsTitle":"美女直播","updateKeywordDate":"2017-02-28 20:14:01","url":"http://www.junjiewang.com\/44756.html","webSrc":"junjie"}, {"id":195258,"keywordCore":"直播","keywordDepartment":"YY","keywordWarn":"裸聊","newsDate":"2017-02-28 20:13:58","newsTitle":"美女直播","updateKeywordDate":"2017-02-28 20:13:58","url":"http:www.junjiewang.com/45345.html","webSrc":"junjie"},], "contentList":[{"contentNum":67,"dateDay":"2017-01-08"},{"contentNum":20,"dateDay":"2017-01-09"}], "data":[{"$ref":"$.aaData[0]"},{"$ref":"$.aaData[1]"}], "error":False,"iTotalDisplayRecords":2,"iTotalRecords":2,"recordsFiltered":2,"recordsTotal":2,"sEcho":"3","success":True, "warnList":[{"contentNum":28,"dateDay":"2017-01-08"},{"contentNum":8,"dateDay":"2017-01-09"}]}
59 |     js = {"aaData":[{"id":195261,"keywordCore":"\u76F4\u64AD","keywordDepartment":"YY","keywordWarn":"\u88F8\u804A","newsDate":"2017-02-28 20:14:01","newsTitle":"\u76F4\u64AD\u65B0\u89C4\u4ECA\u8D77\u5B9E\u65BD\uFF0C\u4F60\u5E94\u8BE5\u77E5\u9053\u7684\u516D\u4E2A\u95EE\u9898","updateKeywordDate":"2017-02-28 20:14:01","url":"http:\/\/www.junjiewang.com\/hulianwang\/44756.html","webSrc":"junjie"}],"data":[{"$ref":"$.aaData[0]"}],"error":False,"iTotalDisplayRecords":1,"iTotalRecords":1,"recordsFiltered":1,"recordsTotal":1,"sEcho":"11","success":True}
60 |     # print json.dumps(js, indent=4)
61 |     put_unicode_to_str()
62 | 
63 | 


--------------------------------------------------------------------------------
/python_utils/machine_learn/perception/perception.py:
--------------------------------------------------------------------------------
  1 | # _*_ coding:utf-8 _*_
  2 | import numpy as np
  3 | from utils.FileUtil import get_line_lst
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | 
  7 | class Perception(object):
  8 | 
  9 |     def __init__(self, var_num):
 10 |         # self.w = np.random.randn(1, var_num)
 11 |         self.w = np.ones(var_num)
 12 |         self.b = 1
 13 |         self.var_num = var_num
 14 |         self.min_error_rate = 0.02
 15 | 
 16 |     def train(self, train_data, eta):
 17 |         """
 18 |         training model
 19 |         :param train_data: array like [[1, 2, 0], [1.1, 0.8, 1]]
 20 |         :param eta: learning rate:
 21 |         :return none:
 22 |         """
 23 |         for item in train_data:
 24 |             output = (np.dot(self.w, item[0:-1]) + self.b)*item[-1]
 25 |             if output <= 0:
 26 |                 self.w += eta * item[-1] * item[0:-1]
 27 |                 self.b += eta * item[-1]
 28 | 
 29 |     def sgd(self, train_data, epoch, eta, batch_size):
 30 |         """
 31 |         Training perception model by stochastic gradient descent
 32 |         :param train_data: 2D array like [[1.1, 2.3, -1]] the last
 33 |                             item -1 train_date[0][-1] means label
 34 |         :param epoch:
 35 |         :param eta:learning rate
 36 |         :return:none
 37 |         """
 38 |         for i in xrange(epoch):
 39 |             np.random.shuffle(train_data)
 40 |             batch_lst = [train_data[k:k+batch_size] for k in xrange(0, len(train_data), batch_size)]
 41 |             for mini_batch in batch_lst:
 42 |                 self.train(mini_batch, eta)
 43 | 
 44 |             current_error_rate = self.get_error_rate(train_data)
 45 |             print 'epoch {0} current_error_rate: {1}'.format(i+1, current_error_rate)
 46 |             print self.get_current_para()
 47 |             if current_error_rate <= self.min_error_rate:
 48 |                 break
 49 | 
 50 |     def get_error_rate(self, validate_data):
 51 |         all_len = validate_data.shape[0]
 52 |         error_len = 0
 53 |         for item in validate_data:
 54 |             output = np.dot(self.w, item[0:-1]) + self.b
 55 |             output = 1 if output >= 0 else -1
 56 |             error = True if output != item[-1] else False
 57 |             if error:
 58 |                 error_len += 1
 59 | 
 60 |         return float(error_len) / all_len
 61 | 
 62 |     def get_current_para(self):
 63 |         return self.w, self.b
 64 | 
 65 |     def get_weight(self):
 66 |         return self.w
 67 | 
 68 |     def get_bias(self):
 69 |         return self.b
 70 | 
 71 | 
 72 | def generate_data(data_path):
 73 |     lst_data = get_line_lst(data_path)
 74 | 
 75 |     # lst_ret = []
 76 |     # for item in lst_data:
 77 |     #     lst_ret.append([float(s) for s in item.split()])
 78 |     # the following one line  is equivalent to the above for loop
 79 |     lst_ret = [[float(s) for s in item.split()] for item in lst_data]
 80 | 
 81 |     ret_arr = np.array(lst_ret)
 82 | 
 83 |     # change all the label whose value is 0 to -1
 84 |     for i in xrange(ret_arr.shape[0]):
 85 |         if ret_arr[i][-1] == 0:
 86 |             ret_arr[i][-1] = -1
 87 | 
 88 |     return ret_arr
 89 | 
 90 | 
 91 | def plot_data_scatter(train_data, w, b):
 92 |     x = np.linspace(-5, 5, 10)
 93 |     plt.figure()
 94 |     # 画散点图(plot scatter)
 95 |     for i in range(len(train_data)):
 96 |         if train_data[i][-1] == 1:
 97 |             plt.scatter(train_data[i][0], train_data[i][1], c=u'b')
 98 |         else:
 99 |             plt.scatter(train_data[i][0], train_data[i][1], c=u'r')
100 | 
101 |     # 画感知机分类，slope斜率图
102 |     plt.plot(x, -(w[0]*x+b) / w[1], c=u'r')
103 |     plt.show()
104 | 
105 | if __name__ == '__main__':
106 |     data_path = '../dataset/perception/dataset.txt'
107 |     train_data = generate_data(data_path)
108 |     epoch, eta, var_num, batch_size = 100, 0.1, 2, 20
109 |     p = Perception(var_num)
110 |     p.sgd(train_data, epoch, eta, batch_size)
111 |     plot_data_scatter(train_data, p.get_weight(), p.get_bias())
112 | 


--------------------------------------------------------------------------------
/python_utils/al_lt_common/al_cv.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | 
  3 | import numpy as np
  4 | import cv2
  5 | 
  6 | 
  7 | def NMS(bboxes, threshold=0.5, model='union'):
  8 |     """
  9 |     Non max suppression
 10 |     :param bboxes: tensor bounding boxes and scores sized [N, 5]
 11 |     :param threshold:float overlap threshold
 12 |     :param model: str 'union', 'min'
 13 |     :return:
 14 |         bboxes after nms
 15 |         picked indices
 16 |     """
 17 |     x1 = bboxes[:, 0]
 18 |     y1 = bboxes[:, 1]
 19 |     x2 = bboxes[:, 2]
 20 |     y2 = bboxes[:, 3]
 21 |     scores = bboxes[:, 4]
 22 | 
 23 |     # all the box areas
 24 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 25 |     # descending order scores
 26 |     orders = np.argsort(-scores)
 27 | 
 28 |     # store the saving indices of the bounding box
 29 |     keep_idx = []
 30 | 
 31 |     while len(orders) > 0:
 32 |         idx = orders[0]
 33 |         keep_idx.append(idx)
 34 | 
 35 |         # tensor operator, compute all the intersect with the max score area
 36 |         xx1 = np.maximum(x1[idx], x1[orders[1:]])
 37 |         yy1 = np.maximum(y1[idx], y1[orders[1:]])
 38 |         xx2 = np.minimum(x2[idx], x2[orders[1:]])
 39 |         yy2 = np.minimum(y2[idx], y2[orders[1:]])
 40 | 
 41 |         w = np.maximum(0.0, xx2 - xx1 + 1)
 42 |         h = np.maximum(0.0, yy2 - yy1 + 1)
 43 |         # get all the intersect area, note this is tensor operator
 44 |         inter = w * h
 45 | 
 46 |         # compute the ratio of overlap, note this is tensor operator
 47 |         overlap_ratio = inter / (areas[idx] + areas[orders[1:]] - inter)
 48 | 
 49 |         inds = np.where(overlap_ratio <= threshold)[0]
 50 |         orders = orders[inds + 1]  # add 1, because the first is the keep index
 51 | 
 52 |     return bboxes[keep_idx], keep_idx
 53 | 
 54 | 
 55 | def draw_rect_score(img, lst_box):
 56 |     # draw rectangle, and score into img
 57 |     for box in lst_box:
 58 |         p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
 59 |         cv2.rectangle(img, p1, p2, (0, 0, 255), 2)
 60 |         score = str(box[4])
 61 |         cv2.putText(img, score, p1, cv2.FONT_HERSHEY_SIMPLEX, 1.1, (255, 0, 0), 2)
 62 | 
 63 | 
 64 | def compute_iou(box1, box2):
 65 |     """
 66 |     Compute iou rate of two box.
 67 |     :param box1: lst [x1, y1, x2, y2, score]
 68 |     :param box2: like box1
 69 |     :return: float iou rate
 70 |     """
 71 |     # compute iou area x1, y1, x2, y2
 72 |     xx1 = max(box1[0], box2[0])
 73 |     yy1 = max(box1[1], box2[1])
 74 |     xx2 = min(box1[2], box2[2])
 75 |     yy2 = min(box1[3], box2[3])
 76 | 
 77 |     # compute intersect area
 78 |     w, h = max(xx2 - xx1 + 1, 0.0), max(yy2 - yy1 + 1, 0.0)
 79 |     inter_area = w * h
 80 | 
 81 |     area1 = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1)
 82 |     area2 = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1)
 83 | 
 84 |     return inter_area / float(area1 + area2 - inter_area)
 85 | 
 86 | 
 87 | def test_iou_rate():
 88 |     """Test iou compute.
 89 |     """
 90 |     # box group1
 91 |     box1 = [146, 173, 240, 263]
 92 |     box2 = [160, 152, 251, 245]
 93 |     box2 = [174, 196, 266, 282]
 94 |     print compute_iou(box1, box2)  # 0.37405
 95 | 
 96 |     # box group2
 97 |     box1 = [556, 102, 648, 198]
 98 |     box2 = [570, 133, 670, 228]
 99 |     print compute_iou(box1, box2)  # 0.38613
100 | 
101 | 
102 | def test_nms():
103 |     img_path = 'E:/bolg_img/deeplearn/nms/nms_4.jpg'
104 |     lst_box = [[146, 173, 240, 263, 0.98], [160, 152, 251, 245, 0.83],
105 |                [174, 196, 266, 282, 0.75],
106 |                [556, 102, 648, 198, 0.81],
107 |                [570, 133, 670, 228, 0.67]
108 |                ]
109 |     lst_box = np.array(lst_box)
110 | 
111 |     img = cv2.imread(img_path)
112 |     img_copy = img.copy()
113 |     draw_rect_score(img, lst_box)
114 | 
115 |     lst_box, _ = NMS(lst_box, 0.2)
116 |     draw_rect_score(img_copy, lst_box)
117 | 
118 |     cv2.imshow("img_with_rect", img)
119 |     cv2.imshow("nms_img_with_rect", img_copy)
120 | 
121 |     cv2.waitKey(0)
122 | 
123 | 
124 | if __name__ == '__main__':
125 |     test_iou_rate()
126 |     # test_nms()
127 | 
128 |     arr = np.array([4, 1, 3, 5])
129 |     print arr.size
130 | 
131 |     pass
132 | 


--------------------------------------------------------------------------------
/python_utils/matplot/plot_many.py:
--------------------------------------------------------------------------------
  1 | # _*_ coding:utf-8 _*_
  2 | """
  3 | This file is about subplot
  4 | mainly cited from `http://matplotlib.org/examples/pylab_examples/subplots_demo.html`
  5 | """
  6 | 
  7 | import numpy as np
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | 
 11 | def plot_mul():
 12 |     left, width = 0.1, 0.8
 13 |     ax1 = plt.axes([left, 0.5, width, 0.45])
 14 |     ax2 = plt.axes([left, 0.3, width, 0.19])
 15 |     ax3 = plt.axes([left, 0.2, width, 0.09], sharex=ax2)
 16 |     ax4 = plt.axes([left, 0.1, width, 0.09], sharex=ax2)
 17 | 
 18 |     # ticks at the top of the top plot
 19 |     ax1.xaxis.tick_top()
 20 |     # remove ticks for ax2 and ax3
 21 |     ax2.xaxis.set_visible(False)
 22 |     ax3.xaxis.set_visible(False)
 23 | 
 24 |     # only put ticks on the bottom of ax4
 25 |     ax4.xaxis.tick_bottom()
 26 |     plt.show()
 27 | 
 28 | 
 29 | def subplot_demo1():
 30 |     # Simple data to display in various forms
 31 |     x = np.linspace(0, 2 * np.pi, 400)
 32 |     y = np.sin(x ** 2)
 33 | 
 34 |     # Just a figure and one subplot
 35 |     f, ax = plt.subplots()
 36 |     ax.plot(x, y)
 37 |     ax.set_title('Simple plot')
 38 |     plt.show()
 39 | 
 40 | 
 41 | def subplot_demo2():
 42 |     x = np.linspace(0, 2 * np.pi, 400)
 43 |     y = np.sin(x ** 2)
 44 | 
 45 |     f, ax_arr = plt.subplots(2, sharex=True)
 46 |     ax_arr[0].plot(x, y)
 47 |     ax_arr[0].set_title('sharing x axis')
 48 |     ax_arr[1].scatter(x, y)
 49 |     plt.show()
 50 | 
 51 | 
 52 | def subplot_demo3():
 53 |     # Two subplots, unpack the axes array immediately
 54 |     x = np.linspace(0, 2 * np.pi, 300)
 55 |     y = np.sin(x ** 2)
 56 | 
 57 |     f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
 58 |     ax1.plot(x, y)
 59 |     ax1.set_title('Sharing Y axis')
 60 |     ax2.scatter(x, y)
 61 | 
 62 |     plt.show()
 63 | 
 64 | 
 65 | def subplots_demo4():
 66 |     x = np.linspace(0, 2 * np.pi, 300)
 67 |     y = np.sin(x ** 2)
 68 | 
 69 |     f, (ax1, ax2, ax3) = plt.subplots(3, sharex=True)
 70 |     ax1.plot(x, y)
 71 |     ax1.set_title('sharing x axis')
 72 | 
 73 |     ax2.scatter(x, y)
 74 | 
 75 |     x1 = [1, 2, 3, 4, 5, 6, 7]
 76 |     y1 = [2.6, 3.6, 8.3, 56, 12.7, 8.9, 5.3]
 77 |     ax3.scatter(x1, y1)
 78 |     plt.show()
 79 | 
 80 | 
 81 | def subplots_demo5():
 82 |     x = np.linspace(0, 2 * np.pi, 300)
 83 |     y = np.sin(x ** 2)
 84 |     f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex='col', sharey='row')
 85 |     ax1.plot(x, y)
 86 |     ax1.set_title('Sharing x per column, y per row')
 87 |     ax2.scatter(x, y)
 88 |     ax2.set_title('ax2')
 89 | 
 90 |     ax3.plot(x, 2 * y ** 2 + 1, color='r')
 91 |     ax3.set_title('ax3')
 92 |     ax4.plot(x, 2 * y ** 2 + 1, color='b')
 93 |     ax4.set_title('ax4')
 94 | 
 95 |     plt.show()
 96 |     pass
 97 | 
 98 | 
 99 | def subplots_demo6():
100 |     x = np.linspace(0, 2 * np.pi, 300)
101 |     y = np.sin(x ** 2)
102 |     f, ax_arr = plt.subplots(2, 2)
103 | 
104 |     ax_arr[0, 0].plot(x, y)
105 |     ax_arr[0, 0].set_title('axis 0, 0')
106 | 
107 |     ax_arr[0, 1].scatter(x, y)
108 |     ax_arr[0, 1].set_title('axis 0, 1')
109 | 
110 |     ax_arr[1, 0].plot(x, y ** 2)
111 |     ax_arr[1, 0].set_title('axis 1, 0')
112 | 
113 |     ax_arr[1, 1].scatter(x, y ** 2)
114 |     ax_arr[1, 1].set_title('axis 1, 1')
115 |     # for row 0, every element x axis hidden
116 |     plt.setp([ax.get_xticklabels() for ax in ax_arr[0, :]], visible=False)
117 |     # for column 1, every element y axis hidden
118 |     plt.setp([ax.get_yticklabels() for ax in ax_arr[:, 1]], visible=False)
119 |     plt.show()
120 | 
121 | 
122 | def plot_fun():
123 |     x = np.linspace(0.0, 1.0, 1000)
124 |     plt.plot(x, -1 * np.log2(x) * x)
125 |     plt.xlim(0, 1)
126 |     plt.ylim(0, 1)
127 |     plt.show()
128 | 
129 | 
130 | def plot_sigmoid():
131 |     def sigmoid(x):
132 |         return 1.0 / (1 + np.exp(x))
133 | 
134 |     x = np.arange(-10., 10., 0.1)
135 |     plt.plot(x, sigmoid(x))
136 |     plt.show()
137 | 
138 | 
139 | if __name__ == '__main__':
140 |     plot_sigmoid()
141 |     # plot_fun()
142 |     # plot_mul()
143 |     # subplot_demo1()
144 |     # subplot_demo2()
145 |     # subplot_demo3()
146 |     # subplots_demo4()
147 |     # subplots_demo5()
148 |     # subplots_demo6()
149 |     pass
150 | 


--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_miss_data.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | 
  6 | 
  7 | def miss_series():
  8 |     data = pd.Series([1, np.nan, 'hello', None])
  9 | 
 10 |     print data.isnull()
 11 |     # 0    False
 12 |     # 1     True
 13 |     # 2    False
 14 |     # 3     True
 15 |     # dtype: bool
 16 |     print data[data.notnull()]
 17 |     # 0        1
 18 |     # 2    hello
 19 |     # dtype: object
 20 |     print data.dropna()
 21 |     # 0        1
 22 |     # 2    hello
 23 |     # dtype: object
 24 | 
 25 | 
 26 | def miss_df():
 27 |     df = pd.DataFrame([[1, np.nan, 2],
 28 |                        [2, 3, 5],
 29 |                        [np.nan, 4, 6]])
 30 | 
 31 |     print df
 32 |     #      0    1  2
 33 |     # 0  1.0  NaN  2
 34 |     # 1  2.0  3.0  5
 35 |     # 2  NaN  4.0  6
 36 |     print df.dropna()
 37 |     #      0    1  2
 38 |     # 1  2.0  3.0  5
 39 |     print df.dropna(axis=1)
 40 |     #    2
 41 |     # 0  2
 42 |     # 1  5
 43 |     # 2  6
 44 | 
 45 |     df[3] = np.nan
 46 |     print df
 47 |     #      0    1  2   3
 48 |     # 0  1.0  NaN  2 NaN
 49 |     # 1  2.0  3.0  5 NaN
 50 |     # 2  NaN  4.0  6 NaN
 51 |     print df.dropna(axis='columns', how='all')
 52 |     #      0    1  2
 53 |     # 0  1.0  NaN  2
 54 |     # 1  2.0  3.0  5
 55 |     # 2  NaN  4.0  6
 56 |     print df.dropna(axis='rows', thresh=3)
 57 |     #      0    1  2   3
 58 |     # 1  2.0  3.0  5 NaN
 59 | 
 60 | 
 61 | def fill_series():
 62 |     data = pd.Series([1, np.nan, 2, None, 3], index=list('abcde'))
 63 |     print data
 64 |     # a    1.0
 65 |     # b    NaN
 66 |     # c    2.0
 67 |     # d    NaN
 68 |     # e    3.0
 69 |     # dtype: float64
 70 |     data.fillna(0)
 71 |     print data
 72 |     # a    1.0
 73 |     # b    NaN
 74 |     # c    2.0
 75 |     # d    NaN
 76 |     # e    3.0
 77 |     # dtype: float64
 78 | 
 79 |     # We can specify a forward-fill to propagate the previous value forward:
 80 |     print data.fillna(method='ffill')
 81 |     # a    1.0
 82 |     # b    1.0
 83 |     # c    2.0
 84 |     # d    2.0
 85 |     # e    3.0
 86 |     # dtype: float64
 87 | 
 88 |     # Or we can specify a back-fill to propagate the next values backward:
 89 |     print data.fillna(method='bfill')
 90 |     # a    1.0
 91 |     # b    2.0
 92 |     # c    2.0
 93 |     # d    3.0
 94 |     # e    3.0
 95 |     # dtype: float64
 96 | 
 97 | 
 98 | def fill_df():
 99 |     df = pd.DataFrame([[1, np.nan, 2],
100 |                        [2, 3, 5],
101 |                        [np.nan, 4, 6]])
102 | 
103 |     df[3] = np.nan
104 |     print df
105 |     #      0    1  2   3
106 |     # 0  1.0  NaN  2 NaN
107 |     # 1  2.0  3.0  5 NaN
108 |     # 2  NaN  4.0  6 NaN
109 | 
110 |     print df.fillna(method='ffill', axis=1)
111 |     #      0    1    2    3
112 |     # 0  1.0  1.0  2.0  2.0
113 |     # 1  2.0  3.0  5.0  5.0
114 |     # 2  NaN  4.0  6.0  6.0
115 | 
116 | 
117 | def drop_specify():
118 |     data = {'name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
119 |             'year': [2012, 2012, 2013, 2014, 2014],
120 |             'reports': [4, 24, 31, 2, 3]}
121 | 
122 |     df = pd.DataFrame(data,
123 |                       index=['Cochice', 'Pima', 'Santa Cruz', 'Maricopa', 'Yuma'])
124 | 
125 |     print df
126 |     #              name  reports  year
127 |     # Cochice     Jason        4  2012
128 |     # Pima        Molly       24  2012
129 |     # Santa Cruz   Tina       31  2013
130 |     # Maricopa     Jake        2  2014
131 |     # Yuma          Amy        3  2014
132 | 
133 |     print df.drop(['Cochice', 'Pima'])
134 |     #             name  reports  year
135 |     # Santa Cruz  Tina       31  2013
136 |     # Maricopa    Jake        2  2014
137 |     # Yuma         Amy        3  2014
138 | 
139 |     print df.drop('reports', axis=1)
140 |     #              name  year
141 |     # Cochice     Jason  2012
142 |     # Pima        Molly  2012
143 |     # Santa Cruz   Tina  2013
144 |     # Maricopa     Jake  2014
145 |     # Yuma          Amy  2014
146 | 
147 |     print df[df.name != 'Tina']
148 | 
149 |     print df.drop(df.index[2])
150 | 
151 |     print df.drop(df.index[[2, 3]])
152 | 
153 | if __name__ == '__main__':
154 |     drop_specify()
155 |     # fill_df()
156 |     # fill_series()
157 |     # miss_df()
158 |     # miss_series()
159 |     pass
160 | 


--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/sk_feature_process.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | 
  3 | from sklearn import datasets
  4 | from sklearn.feature_selection import RFE
  5 | from sklearn.linear_model import LogisticRegression
  6 | import numpy as np
  7 | 
  8 | 
  9 | def get_dummy_data():
 10 |     x = np.array([[-1, 1, -1, 1, -1, 1],
 11 |                   [0.1, 0.2, 0.3, 0.4, 0.5, 0.6],
 12 |                   [0.6, 0.5, 0.4, 0.3, 0.2, 0.1],
 13 |                   ])
 14 | 
 15 |     x = x.T
 16 |     y = np.array([0, 0, 1, 1, 0, 0])
 17 | 
 18 |     return x, y
 19 | 
 20 | 
 21 | def sk_feature_ref():
 22 |     # load the iris datasets
 23 |     dataset = datasets.load_iris()
 24 |     # create a base classifier used to evaluate a subset of attributes
 25 |     model_lr = LogisticRegression()
 26 |     # create the RFE model and select 3 attributes
 27 |     rfe = RFE(model_lr, 3)
 28 |     rfe = rfe.fit(dataset.data, dataset.target)
 29 |     # summarize the selection of the attributes
 30 |     print rfe.support_
 31 |     # [False  True  True  True]
 32 |     print rfe.ranking_
 33 |     # [2 1 1 1]
 34 |     print sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), dataset.feature_names))
 35 |     # [(1.0, 'petal length (cm)'), (1.0, 'petal width (cm)'), (1.0, 'sepal width (cm)'), (2.0, 'sepal length (cm)')]
 36 | 
 37 | 
 38 | def feature_importance():
 39 |     from sklearn.ensemble import ExtraTreesClassifier
 40 | 
 41 |     dataset = datasets.load_iris()
 42 |     model = ExtraTreesClassifier()
 43 |     model.fit(dataset.data, dataset.target)
 44 |     print zip(dataset.feature_names, map(lambda x: round(x, 2), model.feature_importances_))
 45 |     # [('sepal length (cm)', 0.13), ('sepal width (cm)', 0.07), ('petal length (cm)', 0.35), ('petal width (cm)', 0.45)]
 46 | 
 47 | 
 48 | def sk_feature_ref_v2():
 49 |     X, Y = get_dummy_data()
 50 |     names = ['f1', 'f2', 'f3']
 51 | 
 52 |     model_lr = LogisticRegression()
 53 | 
 54 |     rfe = RFE(model_lr, 2)
 55 |     rfe = rfe.fit(X, Y)
 56 | 
 57 |     print rfe.support_
 58 |     print rfe.ranking_
 59 |     print sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), names))
 60 | 
 61 | 
 62 | def test_standard_scaler():
 63 |     from sklearn.preprocessing import StandardScaler
 64 |     arr = [-2, -1, 0, 1, 2]
 65 |     print StandardScaler().fit_transform(arr)
 66 |     # [-1.414-0.707 0  0.707 1.414]
 67 | 
 68 | 
 69 | def test_min_max_scaler():
 70 |     from sklearn.preprocessing import MinMaxScaler
 71 |     arr = np.array([0, 1, 2, 3, 4])
 72 |     print MinMaxScaler().fit_transform(arr)
 73 |     # [ 0.    0.25  0.5   0.75  1.  ]
 74 | 
 75 | 
 76 | def test_normalizer():
 77 |     from sklearn.preprocessing import Normalizer
 78 |     arr = np.array([[3, -1],
 79 |                     [-4, 2]])
 80 | 
 81 |     print Normalizer().fit_transform(arr)
 82 |     # [[ 0.9486833  -0.31622777]
 83 |     #  [-0.89442719  0.4472136 ]]
 84 | 
 85 | 
 86 | def test_binarizer():
 87 |     from sklearn.preprocessing import Binarizer
 88 |     arr = np.array([0, 1, 2, 3, 4])
 89 |     print Binarizer(threshold=2).fit_transform(arr)
 90 |     # [[0 0 0 1 1]]
 91 | 
 92 | 
 93 | def test_pearsonr():
 94 |     from scipy.stats import pearsonr
 95 |     arr1 = np.arange(0, 12)
 96 |     arr2 = np.arange(5, 17)
 97 |     print pearsonr(arr1, arr2)
 98 | 
 99 |     x = np.arange(-1, 1, 30)
100 |     y = x
101 |     print pearsonr(x, y)
102 | 
103 | 
104 | def rfr_feature_select():
105 |     from sklearn.datasets import load_boston
106 |     from sklearn.ensemble import RandomForestRegressor
107 |     from sklearn.cross_validation import cross_val_score, ShuffleSplit
108 | 
109 |     boston = load_boston()
110 |     X = boston["data"]
111 |     Y = boston["target"]
112 |     names = boston["feature_names"]
113 | 
114 |     rf = RandomForestRegressor(n_estimators=20, max_depth=4)
115 |     scores = []
116 |     for i in range(X.shape[1]):
117 |         score = cross_val_score(rf, X[:, i:i + 1],
118 |                                 Y, scoring="r2", cv=ShuffleSplit(len(X), 3, .3))
119 |         scores.append((round(np.mean(score), 3), names[i]))
120 | 
121 |     print sorted(scores, reverse=True)
122 | 
123 | 
124 | if __name__ == '__main__':
125 |     feature_importance()
126 |     # rfr_feature_select()
127 |     # test_pearsonr()
128 |     # test_binarizer()
129 |     # test_normalizer()
130 |     # test_min_max_scaler()
131 |     # test_standard_scaler()
132 |     # sk_feature_ref_v2()
133 |     # sk_feature_ref()
134 |     pass
135 | 


--------------------------------------------------------------------------------