├── python_utils ├── matplot │ ├── __init__.py │ ├── plot_accuracy_loss.py │ ├── basic.py │ └── plot_many.py ├── utils │ ├── __init__.py │ └── JsonUtil.py ├── DbService │ ├── __init__.py │ ├── mysql_db │ │ ├── __init__.py │ │ ├── DbBase.py │ │ └── DbSubService.py │ ├── redis_db │ │ └── __init__.py │ ├── sqlalchemy │ │ ├── __init__.py │ │ ├── orm.py │ │ └── basic.py │ └── config │ │ └── mysql_config.json ├── al_lt_common │ ├── __init__.py │ ├── al_str.py │ └── al_cv.py ├── distributed │ ├── __init__.py │ └── zookeeper_demo │ │ ├── __init__.py │ │ ├── zk_failover_monitor.py │ │ ├── zk_watch.py │ │ ├── zk_failover_worker.py │ │ ├── zk_lock_demo.py │ │ ├── zk_master_select.py │ │ └── zk_node_ope.py ├── http_basic │ ├── __init__.py │ ├── flask_web │ │ ├── __init__.py │ │ ├── flask_restful.py │ │ ├── flask_resp.py │ │ ├── flask_file_svr.py │ │ ├── flask_auth.py │ │ ├── flask_error_handler.py │ │ ├── flask_basic_web.py │ │ └── flask_content_type.py │ ├── http_realize │ │ ├── __init__.py │ │ ├── http_realize_1 │ │ │ ├── __init__.py │ │ │ └── http_server_1.py │ │ ├── static_server │ │ │ ├── __init__.py │ │ │ ├── plain.html │ │ │ └── static_server.py │ │ ├── test_SimpleHTTPServer.py │ │ ├── http_svr_simple.py │ │ └── http_svr_basic_1.py │ ├── simple_rpc │ │ ├── __init__.py │ │ ├── rpc_client_1.py │ │ └── rpc_server_1.py │ ├── socket_basic │ │ ├── __init__.py │ │ ├── udp_sock │ │ │ ├── __init__.py │ │ │ ├── udp_client.py │ │ │ └── udp_server.py │ │ ├── SocketServer_basic.py │ │ └── basic_client.py │ ├── wsgi_demo │ │ ├── __init__.py │ │ └── wsgi_demo.py │ ├── http_client_get.py │ └── url_ope.py ├── machine_learn │ ├── __init__.py │ ├── PCA │ │ ├── __init__.py │ │ └── pca_basic.py │ ├── Bayes │ │ ├── __init__.py │ │ └── bayes_sklearn.py │ ├── cluster │ │ ├── __init__.py │ │ └── sk_cluster.py │ ├── knearest │ │ ├── __init__.py │ │ ├── knn_scratch.py │ │ └── knn_classify_sklearn.py │ ├── decision_tree │ │ ├── __init__.py │ │ ├── dt.png │ │ ├── tree.dot │ │ ├── create_data.py │ │ ├── dtree_scratch.py │ │ └── dtree_sklearn.py │ ├── perception │ │ ├── __init__.py │ │ └── perception.py │ ├── linear_regression │ │ ├── __init__.py │ │ └── sk_example.py │ ├── logistic_regression │ │ ├── __init__.py │ │ ├── lr_sklearn_v1.py │ │ └── lr_scratch.py │ ├── neural_network_keras │ │ ├── __init__.py │ │ ├── lstm_nlp.py │ │ ├── cnn_keras_digits.py │ │ └── nn_keras_digits.py │ └── dataset │ │ ├── decision_tree │ │ └── data_banknote_authentication.txt │ │ ├── cluster │ │ └── cluster_txt │ │ ├── perception │ │ └── dataset.txt │ │ └── logistic_regression │ │ └── lr_ml_action.txt ├── netsocket │ ├── __init__.py │ ├── basic_socket.py │ └── ip_int.py ├── numpy_operate │ ├── __init__.py │ ├── structured_arr.py │ ├── arr_vectorize.py │ ├── flip_arr.py │ ├── zero_one_empty.py │ ├── broadcast_demo.py │ ├── array_multiply.py │ ├── np_distance.py │ ├── arr_equal_close.py │ ├── arr_sort.py │ ├── algebra_op.py │ ├── log2_op.py │ ├── idx_arrays.py │ ├── random_arr.py │ └── array_create.py ├── opencv_basic │ ├── __init__.py │ ├── path_var.py │ ├── cv_basic_op.py │ └── url_img_cv.py ├── thread_process │ ├── __init__.py │ ├── basic_process.py │ ├── sema_thread.py │ ├── basic_thread.py │ ├── thread_timer.py │ ├── multitread_profile.py │ ├── thread_condition.py │ ├── pool_dummy.py │ ├── pool_queue.py │ ├── thread_lock.py │ └── thread_queue.py ├── document │ ├── machine_learn │ │ ├── knearest │ │ │ └── README.md │ │ └── percepton │ │ │ ├── perception_plot.jpg │ │ │ ├── perception_ret.jpg │ │ │ └── README.md │ └── numpy_operate │ │ └── README.md ├── py_basic │ ├── __init__.py │ ├── arg_parse.py │ ├── MD5_sha.py │ ├── log_config.py │ ├── global_val.py │ ├── profile_ope.py │ ├── with_usage.py │ ├── collection_ope.py │ ├── except_ope.py │ ├── argparse_ope.py │ ├── tuple_operate.py │ ├── num_ope.py │ ├── argv_basic.py │ ├── decorator_basic.py │ ├── yield_ope.py │ ├── random_operator.py │ ├── decorator_set.py │ ├── dw_img_from_google.py │ ├── calendar_ope.py │ ├── operator_ope.py │ ├── obj_is.py │ ├── base64_test.py │ ├── functional_program.py │ ├── kwargs_xargs.py │ ├── str_basic.py │ ├── set_ope.py │ └── time_ope.py └── sk_sc_pd_operator │ ├── __init__.py │ ├── sc_distance_ope.py │ ├── pd_str.py │ ├── sk_KFlod.py │ ├── pd_plot.py │ ├── sk_metric_accuracy.py │ ├── pd_feature2value.py │ ├── split_train_test_data.py │ ├── pd_index.py │ ├── pd_visualize_diamond.py │ ├── pd_concat_join.py │ ├── pd_GridSearchCV.py │ ├── pd_ope.py │ ├── pd_dummy_val.py │ ├── pd_date_time.py │ ├── pd_pivot.py │ ├── pd_miss_data.py │ └── sk_feature_process.py └── README.md /python_utils/matplot/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/DbService/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/al_lt_common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/distributed/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/http_basic/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/machine_learn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/netsocket/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/numpy_operate/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/opencv_basic/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/DbService/mysql_db/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/DbService/redis_db/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/machine_learn/PCA/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/opencv_basic/path_var.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /python_utils/thread_process/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/DbService/sqlalchemy/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/http_basic/flask_web/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/http_basic/http_realize/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/http_basic/simple_rpc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/http_basic/socket_basic/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/http_basic/wsgi_demo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/machine_learn/Bayes/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/machine_learn/cluster/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/machine_learn/knearest/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/distributed/zookeeper_demo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/machine_learn/decision_tree/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/machine_learn/perception/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/document/machine_learn/knearest/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/http_basic/socket_basic/udp_sock/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/machine_learn/linear_regression/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/machine_learn/logistic_regression/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/http_basic/http_realize/http_realize_1/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/http_basic/http_realize/static_server/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/machine_learn/neural_network_keras/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/machine_learn/dataset/decision_tree/data_banknote_authentication.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python_utils/py_basic/__init__.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | if __name__ == '__main__': 4 | 5 | pass 6 | -------------------------------------------------------------------------------- /python_utils/sk_sc_pd_operator/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This package is common usage about sklearn, scipy, pandas library 3 | """ -------------------------------------------------------------------------------- /python_utils/DbService/config/mysql_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "host":"localhost","user":"root","pwd":"123456", 3 | "db":"springdemo","port":3306 4 | } -------------------------------------------------------------------------------- /python_utils/opencv_basic/cv_basic_op.py: -------------------------------------------------------------------------------- 1 | # _*_coding:utf-8 _*_ 2 | 3 | """ 4 | This file is basic operator about cv2 5 | """ 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /python_utils/machine_learn/decision_tree/dt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jayhello/python_utils/HEAD/python_utils/machine_learn/decision_tree/dt.png -------------------------------------------------------------------------------- /python_utils/document/machine_learn/percepton/perception_plot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jayhello/python_utils/HEAD/python_utils/document/machine_learn/percepton/perception_plot.jpg -------------------------------------------------------------------------------- /python_utils/document/machine_learn/percepton/perception_ret.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jayhello/python_utils/HEAD/python_utils/document/machine_learn/percepton/perception_ret.jpg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # python_utils 2 | 1.This project is my common use/record of python. For some utils if we don't use for a long time,we may forget it. 3 | so I upload it to github.When I need to use some utils,I can get it directily from this project avoiding research form 4 | google. 5 | -------------------------------------------------------------------------------- /python_utils/http_basic/http_realize/static_server/plain.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Plain Page 6 | 7 | 8 |

Plain Page

9 |

Nothin' but HTML.

10 | 11 | -------------------------------------------------------------------------------- /python_utils/sk_sc_pd_operator/sc_distance_ope.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | from scipy.spatial import distance 4 | import numpy as np 5 | 6 | 7 | def eu_distance(): 8 | a1 = np.array([1, 2, 3]) 9 | a2 = np.array([3, 4, 5]) 10 | print distance.euclidean(a1, a2) 11 | # 3.46410161514 12 | 13 | if __name__ == '__main__': 14 | eu_distance() 15 | pass 16 | -------------------------------------------------------------------------------- /python_utils/document/numpy_operate/README.md: -------------------------------------------------------------------------------- 1 | sometimes you want numbered lists 2 | 3 | 1. one 4 | 2. two 5 | 6 | sometimes you want bullet points 7 | 8 | * start a line with a start 9 | * profit! 10 | 11 | Alternatively, 12 | 13 | - Dashes work just as well 14 | - And if you have sub points, put two spaces before the dash or star: 15 | - Like this 16 | - And this 17 | * hello world! 18 | -------------------------------------------------------------------------------- /python_utils/document/machine_learn/percepton/README.md: -------------------------------------------------------------------------------- 1 | ## The running result images 2 | 1. ![console result](https://github.com/Jayhello/python_utils/blob/master/python_utils/document/machine_learn/percepton/perception_ret.jpg) 3 | 2. ![plot result](https://github.com/Jayhello/python_utils/blob/master/python_utils/document/machine_learn/percepton/perception_plot.jpg) 4 | 5 | ## Example of realize perception 6 | -------------------------------------------------------------------------------- /python_utils/thread_process/basic_process.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | import os 4 | import multiprocessing 5 | 6 | 7 | def get_process_id(): 8 | print os.getpid() 9 | # 8844 10 | print multiprocessing.current_process().pid 11 | # 8844 12 | print multiprocessing.current_process().name 13 | # MainProcess 14 | 15 | 16 | if __name__ == '__main__': 17 | get_process_id() 18 | -------------------------------------------------------------------------------- /python_utils/py_basic/arg_parse.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file shows basic usage of argparse. 3 | """ 4 | 5 | import argparse 6 | 7 | ap = argparse.ArgumentParser() 8 | ap.add_argument('-i', '--image', required=True, help='path to image file') 9 | ap.add_argument('-w', '--weights', default='./cnn_weights.dat', 10 | help='path to weights file') 11 | 12 | args = ap.parse_args() 13 | print args.image, args.weights 14 | -------------------------------------------------------------------------------- /python_utils/py_basic/MD5_sha.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | import md5 3 | import sha 4 | import hashlib 5 | 6 | 7 | def test_md5(): 8 | content = 'hello xy, are you ok?' 9 | print hashlib.md5(content).hexdigest() 10 | # 180d5f07d511b660f320cf2a645f1f3b 11 | print hashlib.sha1(content).hexdigest() 12 | # c25884a4688c8b1a25a619f198f91f8661b2623b 13 | 14 | 15 | if __name__ == '__main__': 16 | test_md5() 17 | pass 18 | -------------------------------------------------------------------------------- /python_utils/http_basic/flask_web/flask_restful.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | from flask import Flask 4 | from flask import jsonify 5 | 6 | 7 | app = Flask(__name__) 8 | 9 | 10 | @app.route('/') 11 | def index(): 12 | return "hello world" 13 | 14 | 15 | @app.route('/idx') 16 | def index_js(): 17 | d = {"k": "hello world"} 18 | return jsonify(d) 19 | 20 | 21 | if __name__ == '__main__': 22 | app.run(host='0.0.0.0') 23 | pass 24 | -------------------------------------------------------------------------------- /python_utils/py_basic/log_config.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | import logging 3 | 4 | # log config, module name, line num, function name 5 | logging.basicConfig( 6 | format="%(asctime)s %(levelname)s %(module)s:%(lineno)s %(funcName)s %(threadName)s %(message)s", 7 | level=logging.DEBUG, 8 | datefmt='%Y-%m-%d %I:%M:%S' 9 | ) 10 | 11 | 12 | def test_log(): 13 | logging.info('hello world') 14 | 15 | if __name__ == '__main__': 16 | test_log() 17 | pass 18 | -------------------------------------------------------------------------------- /python_utils/py_basic/global_val.py: -------------------------------------------------------------------------------- 1 | # _*_coding: utf-8 _*_ 2 | """ 3 | test for global variable 4 | note that in multiprocess, every process has it's own 5 | global variable, so if the function fun2 is in subprocess 6 | it will still be 0 7 | """ 8 | g_dst_dir = '' 9 | 10 | g_val = 0 11 | 12 | 13 | def fun2(): 14 | print g_val 15 | 16 | 17 | def fun1(): 18 | global g_val 19 | g_val = 3 20 | fun2() 21 | 22 | 23 | if __name__ == '__main__': 24 | fun1() 25 | pass 26 | -------------------------------------------------------------------------------- /python_utils/http_basic/http_realize/test_SimpleHTTPServer.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import SimpleHTTPServer 4 | 5 | 6 | def test_translate_path(): 7 | url = "http://yy.com/ai/xy/" 8 | handler = SimpleHTTPServer.SimpleHTTPRequestHandler(None, None, None) 9 | 10 | print handler.translate_path(url) 11 | 12 | 13 | if __name__ == '__main__': 14 | 15 | # ----- test translate path ----- 16 | if 1: 17 | test_translate_path() 18 | # ----- end ----- 19 | 20 | pass 21 | -------------------------------------------------------------------------------- /python_utils/numpy_operate/structured_arr.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | import numpy as np 3 | 4 | 5 | def create_structured_arr(): 6 | dtype = [('name', 'S10'), ('height', float), ('age', int)] 7 | arr_val = [('Arthur', 1.8, 41), ('Lancelot', 1.9, 38), 8 | ('Galahad', 1.7, 38)] 9 | 10 | arr = np.array(arr_val, dtype=dtype) 11 | print np.sort(arr, order='height') 12 | pass 13 | 14 | 15 | if __name__ == '__main__': 16 | create_structured_arr() 17 | pass 18 | -------------------------------------------------------------------------------- /python_utils/distributed/zookeeper_demo/zk_failover_monitor.py: -------------------------------------------------------------------------------- 1 | from kazoo.client import KazooClient 2 | 3 | import time 4 | 5 | import logging 6 | logging.basicConfig() 7 | 8 | zk = KazooClient(hosts='127.0.0.1:2181') 9 | zk.start() 10 | 11 | # Determine if a node exists 12 | while True: 13 | if zk.exists("/test/failure_detection/worker"): 14 | print "the worker is alive!" 15 | else: 16 | print "the worker is dead!" 17 | break 18 | time.sleep(3) 19 | 20 | zk.stop() 21 | -------------------------------------------------------------------------------- /python_utils/distributed/zookeeper_demo/zk_watch.py: -------------------------------------------------------------------------------- 1 | from kazoo.client import KazooClient 2 | import time 3 | 4 | import logging 5 | logging.basicConfig() 6 | 7 | zk = KazooClient(hosts='127.0.0.1:2181') 8 | zk.start() 9 | 10 | 11 | @zk.DataWatch('/test/zk1/node') 12 | def my_func(data, stat): 13 | if data: 14 | print "Data is %s" % data 15 | print "Version is %s" % stat.version 16 | else: 17 | print "data is not available" 18 | 19 | while True: 20 | time.sleep(10) 21 | 22 | zk.stop() 23 | -------------------------------------------------------------------------------- /python_utils/thread_process/sema_thread.py: -------------------------------------------------------------------------------- 1 | import time 2 | from random import random 3 | from threading import Thread, Semaphore 4 | 5 | sema = Semaphore(3) 6 | 7 | 8 | def foo(tid): 9 | with sema: 10 | print '{} acquire sema'.format(tid) 11 | wt = random() * 2 12 | time.sleep(wt) 13 | print '{} release sema'.format(tid) 14 | 15 | 16 | threads = [] 17 | for i in range(5): 18 | t = Thread(target=foo, args=(i,)) 19 | threads.append(t) 20 | t.start() 21 | for t in threads: 22 | t.join() 23 | -------------------------------------------------------------------------------- /python_utils/http_basic/simple_rpc/rpc_client_1.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import xmlrpclib 4 | 5 | 6 | def test_client_1(): 7 | host = "http://localhost:8888/" 8 | proxy = xmlrpclib.ServerProxy(host) 9 | print "using proxy %s" % proxy 10 | 11 | print "3 is even %s" % str(proxy.is_even(3)) 12 | print "100 is even %s" % str(proxy.is_even(100)) 13 | 14 | 15 | if __name__ == '__main__': 16 | 17 | # ----- test simple rpv client ----- 18 | if 1: 19 | test_client_1() 20 | # ----- end ----- 21 | 22 | pass 23 | -------------------------------------------------------------------------------- /python_utils/distributed/zookeeper_demo/zk_failover_worker.py: -------------------------------------------------------------------------------- 1 | from kazoo.client import KazooClient 2 | import time 3 | 4 | import logging 5 | logging.basicConfig() 6 | 7 | zk = KazooClient(hosts='127.0.0.1:2181') 8 | zk.start() 9 | 10 | # Ensure a path, create if necessary 11 | zk.ensure_path("/test/failure_detection") 12 | 13 | # Create a node with data 14 | zk.create("/test/failure_detection/worker", 15 | value=b"a test value", ephemeral=True) 16 | 17 | while True: 18 | print "I am alive!" 19 | time.sleep(3) 20 | 21 | zk.stop() 22 | -------------------------------------------------------------------------------- /python_utils/py_basic/profile_ope.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import cProfile 4 | 5 | 6 | def func1(): 7 | sum = 0 8 | for i in range(1000000): 9 | sum += i 10 | 11 | # 1 0.167 0.167 0.167 0.167 {range} 12 | # 4 function calls in 0.674 seconds 13 | 14 | 15 | def func2(): 16 | sum = 0 17 | for i in xrange(1000000): 18 | sum += i 19 | 20 | # 3 function calls in 0.350 seconds 21 | 22 | if __name__ == '__main__': 23 | # cProfile.run("func1()") 24 | cProfile.run("func2()") 25 | pass 26 | -------------------------------------------------------------------------------- /python_utils/http_basic/flask_web/flask_resp.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | from flask import Flask 4 | from flask import Response 5 | import json 6 | 7 | 8 | app = Flask(__name__) 9 | 10 | 11 | @app.route('/hello', methods=['GET']) 12 | def api_hello(): 13 | data = {'name': 'xy', 'greet': "hello"} 14 | js_str = json.dumps(data) 15 | 16 | resp = Response(js_str, status=200, mimetype='application/json') 17 | resp.headers['Link'] = 'http://xy.com' 18 | 19 | return resp 20 | 21 | 22 | if __name__ == '__main__': 23 | app.run(host='0.0.0.0') 24 | pass 25 | -------------------------------------------------------------------------------- /python_utils/py_basic/with_usage.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | 4 | class Sample(object): 5 | def __enter__(self): 6 | print 'in __enter__' 7 | return "Foo" 8 | 9 | def __exit__(self, exc_type, exc_val, exc_tb): 10 | print 'in __exit__' 11 | 12 | 13 | def get_sample(): 14 | return Sample() 15 | 16 | 17 | def test_with(): 18 | with get_sample() as sp: 19 | print 'Sample: ', sp 20 | 21 | # in __enter__ 22 | # Sample: Foo 23 | # in __exit__ 24 | 25 | if __name__ == '__main__': 26 | test_with() 27 | pass 28 | -------------------------------------------------------------------------------- /python_utils/netsocket/basic_socket.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | """ 3 | This file is about some basic operator of socket 4 | """ 5 | 6 | 7 | import socket 8 | 9 | 10 | def get_ip(): 11 | """ 12 | local host ip not 127.0.0.1 13 | socket.gethostbyname(socket.gethostname()) will return 127.0.0.1 14 | :return: 15 | """ 16 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 17 | s.connect(("gmail.com", 80)) 18 | # s.getsockname() -> ip:port 19 | print s.getsockname()[0] 20 | s.close() 21 | 22 | 23 | if __name__ == '__main__': 24 | get_ip() 25 | -------------------------------------------------------------------------------- /python_utils/py_basic/collection_ope.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | """ 4 | operator about collection 5 | """ 6 | 7 | from collections import Counter 8 | 9 | 10 | def counter_usage(): 11 | lst = ['class_1', 'class_2', 'class_1', 'class_1', 'class_1', 'class_2'] 12 | 13 | print Counter(lst).most_common() 14 | # [('class_1', 4), ('class_2', 2)] 15 | 16 | print Counter(lst).most_common(1) 17 | # [('class_1', 4)] 18 | 19 | print Counter(lst).most_common(1)[0][0] 20 | # class_1 21 | 22 | 23 | if __name__ == '__main__': 24 | counter_usage() 25 | pass 26 | -------------------------------------------------------------------------------- /python_utils/py_basic/except_ope.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | 4 | def test_except_scope(): 5 | try: 6 | v = 'test scope' 7 | raise Exception 8 | except Exception as e: 9 | # if v is locals(): 10 | # print v 11 | print v 12 | 13 | 14 | def test_except(): 15 | try: 16 | raise 7 17 | except Exception as e: 18 | print e 19 | # exceptions must be old-style classes or derived from BaseException, not int 20 | 21 | 22 | if __name__ == '__main__': 23 | # test_except_scope() 24 | test_except() 25 | pass 26 | -------------------------------------------------------------------------------- /python_utils/http_basic/simple_rpc/rpc_server_1.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | from SimpleXMLRPCServer import SimpleXMLRPCServer 4 | 5 | 6 | def is_even(n): 7 | return n % 2 8 | 9 | 10 | def test_server_1(): 11 | port = 8888 12 | rpc_server = SimpleXMLRPCServer(("localhost", port)) 13 | print 'now listening in %s' % port 14 | 15 | rpc_server.register_function(is_even, "is_even") 16 | rpc_server.serve_forever() 17 | 18 | 19 | if __name__ == '__main__': 20 | 21 | # -----test rpc server 1----- 22 | if 1: 23 | test_server_1() 24 | # ----- end ----- 25 | 26 | pass 27 | -------------------------------------------------------------------------------- /python_utils/distributed/zookeeper_demo/zk_lock_demo.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | from kazoo.client import KazooClient 4 | import time 5 | import uuid 6 | import logging 7 | logging.basicConfig() 8 | 9 | my_id = uuid.uuid4() 10 | 11 | 12 | def work(): 13 | print "{} is working! ".format(str(my_id)) 14 | 15 | 16 | zk = KazooClient(hosts='127.0.0.1:2181') 17 | zk.start() 18 | 19 | lock = zk.Lock("/lockpath", str(my_id)) 20 | 21 | print "I am {}".format(str(my_id)) 22 | 23 | while True: 24 | with lock: 25 | work() 26 | time.sleep(3) 27 | 28 | zk.stop() 29 | 30 | if __name__ == '__main__': 31 | 32 | pass 33 | -------------------------------------------------------------------------------- /python_utils/distributed/zookeeper_demo/zk_master_select.py: -------------------------------------------------------------------------------- 1 | from kazoo.client import KazooClient 2 | import time 3 | import uuid 4 | 5 | import logging 6 | logging.basicConfig() 7 | 8 | my_id = uuid.uuid4() 9 | 10 | 11 | def leader_func(): 12 | print "I am the leader {}".format(str(my_id)) 13 | while True: 14 | print "{} is working! ".format(str(my_id)) 15 | time.sleep(3) 16 | 17 | zk = KazooClient(hosts='127.0.0.1:2181') 18 | zk.start() 19 | 20 | election = zk.Election("/electionpath") 21 | 22 | # blocks until the election is won, then calls 23 | # leader_func() 24 | election.run(leader_func) 25 | 26 | zk.stop() 27 | -------------------------------------------------------------------------------- /python_utils/sk_sc_pd_operator/pd_str.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | 7 | def series_str(): 8 | data = ['peter', 'Paul', None, 'MARY', 'gUIDO'] 9 | names = pd.Series(data) 10 | print names.str.capitalize() 11 | # 0 Peter 12 | # 1 Paul 13 | # 2 None 14 | # 3 Mary 15 | # 4 Guido 16 | # dtype: object 17 | 18 | print names.str.startswith('p') 19 | # 0 True 20 | # 1 False 21 | # 2 None 22 | # 3 False 23 | # 4 False 24 | # dtype: object 25 | 26 | if __name__ == '__main__': 27 | series_str() 28 | pass 29 | -------------------------------------------------------------------------------- /python_utils/http_basic/socket_basic/udp_sock/udp_client.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import socket 4 | 5 | """ 6 | Test for upd max end size. 7 | """ 8 | 9 | 10 | def udp_client(): 11 | host, port = "localhost", 8888 12 | 13 | sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 14 | msg = 'a' * 1024 * 65 15 | msg = 'a' * 65507 16 | 17 | sent = sock.sendto(msg, (host, port)) 18 | # sent = sock.sendto(msg, (host, port)) 19 | 20 | data, server_add = sock.recvfrom(1024 * 64) 21 | print 'rcv from %s: %s' % (server_add, len(data)) 22 | 23 | 24 | if __name__ == '__main__': 25 | udp_client() 26 | pass 27 | 28 | 29 | -------------------------------------------------------------------------------- /python_utils/sk_sc_pd_operator/sk_KFlod.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | """ 3 | http://scikit-learn.org/stable/modules/cross_validation.html 4 | """ 5 | 6 | 7 | import pandas as pd 8 | import numpy as np 9 | from sklearn.model_selection import KFold 10 | 11 | 12 | def kfold_1(): 13 | X = np.array([[3, 4, 0], [3, 2, 1], [5, 6, 0], 14 | [1, 2, 1], [1, 5, 0], [7, 4, 1]]) 15 | kf = KFold(n_splits=4) 16 | for train, test in kf.split(X): 17 | print "=======train: %s" % X[train].tolist() 18 | # print X[train] 19 | print "=======test : %s" % X[test].tolist() 20 | 21 | 22 | if __name__ == '__main__': 23 | kfold_1() 24 | pass 25 | -------------------------------------------------------------------------------- /python_utils/http_basic/flask_web/flask_file_svr.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | from flask import Flask 4 | from flask import Response 5 | 6 | 7 | app = Flask(__name__) 8 | 9 | 10 | @app.route('/audio/pcm_mp3/') 11 | def stream_mp3(): 12 | def generate(): 13 | path = 'F:/826.mp3' 14 | with open(path, 'rb') as fmp3: 15 | data = fmp3.read(1024) 16 | while data: 17 | yield data 18 | data = fmp3.read(1024) 19 | 20 | return Response(generate(), mimetype="audio/mpeg3") 21 | 22 | 23 | if __name__ == '__main__': 24 | # so the other machine can visit the website by ip 25 | app.run(host='0.0.0.0') 26 | pass 27 | -------------------------------------------------------------------------------- /python_utils/numpy_operate/arr_vectorize.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import numpy as np 4 | 5 | 6 | def my_fun(a, b): 7 | if a >= b: 8 | return a - b 9 | else: 10 | return a + b 11 | 12 | 13 | def test_vectorize(): 14 | v_fun = np.vectorize(my_fun) 15 | arr = np.arange(8).reshape(2, 4) 16 | print arr 17 | # [[0 1 2 3] 18 | # [4 5 6 7]] 19 | print v_fun(arr, 4) 20 | # [[4 5 6 7] 21 | # [0 1 2 3]] 22 | 23 | squarer = lambda t: t ** 2 24 | v_fun = np.vectorize(squarer) 25 | print v_fun(arr) 26 | # [[ 0 1 4 9] 27 | # [16 25 36 49]] 28 | 29 | if __name__ == '__main__': 30 | test_vectorize() 31 | pass 32 | -------------------------------------------------------------------------------- /python_utils/http_basic/socket_basic/udp_sock/udp_server.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import socket 4 | 5 | 6 | def udp_server(): 7 | host, port = "localhost", 8888 8 | 9 | sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 10 | sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1) 11 | sock.bind((host, port)) 12 | 13 | rcv_size = 1024 * 65 14 | while True: 15 | data, address = sock.recvfrom(rcv_size) 16 | print "recv from %s: %s" % (address, len(data)) 17 | 18 | if data: 19 | send_size = sock.sendto(data, address) 20 | print "sendto %s: %s" % (address, send_size) 21 | 22 | 23 | if __name__ == '__main__': 24 | 25 | udp_server() 26 | 27 | pass -------------------------------------------------------------------------------- /python_utils/numpy_operate/flip_arr.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | import numpy as np 4 | 5 | 6 | def flip_arr(): 7 | arr = np.arange(6).reshape(2, 3) 8 | print arr 9 | # [[0 1 2] 10 | # [3 4 5]] 11 | print np.fliplr(arr) 12 | # [[2 1 0] 13 | # [5 4 3]] 14 | print arr[:, ::-1] 15 | # [[2 1 0] 16 | # [5 4 3]] 17 | 18 | print np.flipud(arr) 19 | # [[3 4 5] 20 | # [0 1 2]] 21 | print arr[::-1] 22 | # [[3 4 5] 23 | # [0 1 2]] 24 | 25 | arr2 = np.arange(8).reshape((2, 2, 2)) 26 | print arr2 27 | # [[[0 1] 28 | # [2 3]] 29 | # 30 | # [[4 5] 31 | # [6 7]]] 32 | 33 | 34 | if __name__ == '__main__': 35 | flip_arr() 36 | pass 37 | -------------------------------------------------------------------------------- /python_utils/DbService/mysql_db/DbBase.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | import MySQLdb 4 | from utils.JsonUtil import get_json_from_file 5 | 6 | 7 | class DbBase(object): 8 | def __init__(self, **kwargs): 9 | db_config_file = kwargs['db_config_file'] 10 | self.config_db(db_config_file) 11 | 12 | def config_db(self, db_config_file): 13 | data = get_json_from_file(db_config_file) 14 | host = data['host'] 15 | user = data['user'] 16 | pwd = data['pwd'] 17 | db = data['db'] 18 | port = data['port'] 19 | 20 | self.conn = MySQLdb.connect(host=host, port=port, user=user, passwd=pwd, db=db, charset="utf8", use_unicode=True) 21 | self.cursor = self.conn.cursor() 22 | -------------------------------------------------------------------------------- /python_utils/py_basic/argparse_ope.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | """ 4 | this file is demo about basic args parse usage 5 | """ 6 | 7 | import argparse 8 | 9 | 10 | def args_num_sum(): 11 | parser = argparse.ArgumentParser(description="process some integers") 12 | 13 | parser.add_argument('integers', metavar='N', type=int, nargs='+', 14 | help='an integer for accumulator') 15 | 16 | parser.add_argument('--sum', dest='accumulate', action='store_const', 17 | const=sum, default=max, help='sum the integers (default: find the max)') 18 | 19 | args = parser.parse_args() 20 | print args.accumulate(args.integers) 21 | 22 | 23 | if __name__ == '__main__': 24 | args_num_sum() 25 | pass 26 | -------------------------------------------------------------------------------- /python_utils/http_basic/flask_web/flask_auth.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | """ 3 | cited https://github.com/miguelgrinberg/Flask-HTTPAuth 4 | """ 5 | 6 | from flask import Flask 7 | from flask import request 8 | from flask_httpauth import HTTPBasicAuth 9 | 10 | 11 | app = Flask(__name__) 12 | auth = HTTPBasicAuth() 13 | 14 | users = {"xy1": "11111", "xy2": "22222"} 15 | 16 | 17 | @auth.get_password 18 | def get_pw(user_name): 19 | if user_name in users: 20 | return users.get(user_name) 21 | 22 | return None 23 | 24 | 25 | @app.route('/') 26 | @auth.login_required 27 | def index(): 28 | print request.authorization 29 | return "hello, %s" % auth.username() 30 | 31 | 32 | if __name__ == '__main__': 33 | app.run(host='0.0.0.0') 34 | pass 35 | -------------------------------------------------------------------------------- /python_utils/machine_learn/PCA/pca_basic.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | import numpy as np 3 | 4 | 5 | def get_eigen_vector(): 6 | mt = np.array([[3, -1], [-1, 3]]) 7 | eig_val, eig_vec = np.linalg.eig(mt) 8 | print eig_val 9 | print eig_vec 10 | 11 | 12 | def get_mean(): 13 | mt = np.array([[3, 1], [-1, 3]]) 14 | m_1 = np.mean(mt[0, :]) 15 | m_2 = np.mean(mt[1, :]) 16 | m = np.mean(mt) 17 | print m_1 18 | print m_2 19 | print m 20 | 21 | c_1 = np.mean(mt[:, 0]) 22 | c_2 = np.mean(mt[:, 1]) 23 | print c_1 24 | print c_2 25 | m = np.mean(mt, axis=0) 26 | # m = np.mean(mt, axis=(0, 1)) 27 | print m 28 | 29 | if __name__ == '__main__': 30 | # get_eigen_vector() 31 | get_mean() 32 | pass 33 | -------------------------------------------------------------------------------- /python_utils/http_basic/http_realize/http_svr_simple.py: -------------------------------------------------------------------------------- 1 | # _*_coding:utf-8 _*_ 2 | 3 | import BaseHTTPServer 4 | 5 | 6 | class RequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): 7 | 8 | Page = ''' 9 | 10 | 11 |

Hello World

12 | 13 | 14 | ''' 15 | 16 | def do_GET(self): 17 | self.send_response(200) 18 | self.send_header("Content-type", "text/html") 19 | self.send_header("Content-Length", str(len(self.Page))) 20 | self.end_headers() 21 | self.wfile.write(self.Page) 22 | 23 | 24 | if __name__ == '__main__': 25 | serverAddress = ('', 8888) 26 | server = BaseHTTPServer.HTTPServer(serverAddress, RequestHandler) 27 | server.serve_forever() 28 | -------------------------------------------------------------------------------- /python_utils/http_basic/socket_basic/SocketServer_basic.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | from SocketServer import TCPServer, StreamRequestHandler 3 | 4 | RESPONSE = b"""\ 5 | HTTP/1.1 200 OK 6 | Content-type: text/html 7 | Content-length: 15 8 | 9 |

Hello!

""".replace(b"\n", b"\r\n") 10 | 11 | 12 | class MyHandler1(StreamRequestHandler): 13 | """process tcp server, and send http response back""" 14 | def handle(self): 15 | addr = self.request.getpeername() 16 | print 'get connection from %s, %s ' % addr 17 | self.wfile.write(RESPONSE) 18 | 19 | 20 | def test_handler1(): 21 | # http server 22 | server = TCPServer(('', 8888), MyHandler1) 23 | server.serve_forever() 24 | 25 | 26 | if __name__ == '__main__': 27 | test_handler1() 28 | pass 29 | -------------------------------------------------------------------------------- /python_utils/http_basic/flask_web/flask_error_handler.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | 4 | from flask import Flask 5 | from flask import request 6 | from flask import jsonify 7 | 8 | app = Flask(__name__) 9 | 10 | 11 | @app.errorhandler(404) 12 | def not_found(error=None): 13 | msg = {'status': 404, 'message': 'Not Found: ' + request.url} 14 | 15 | resp = jsonify(msg) 16 | resp.status_code = 404 17 | 18 | return resp 19 | 20 | 21 | @app.route('/users/', methods=['GET']) 22 | def api_users(userid): 23 | users = {'1': 'xy1', '2': 'xy2', '3': 'xy3'} 24 | 25 | if userid in users: 26 | return jsonify({userid: users[userid]}) 27 | else: 28 | return not_found() 29 | 30 | if __name__ == '__main__': 31 | app.run(host='0.0.0.0') 32 | 33 | pass 34 | -------------------------------------------------------------------------------- /python_utils/distributed/zookeeper_demo/zk_node_ope.py: -------------------------------------------------------------------------------- 1 | from kazoo.client import KazooClient 2 | 3 | import logging 4 | logging.basicConfig() 5 | 6 | zk = KazooClient(hosts='127.0.0.1:2181') 7 | zk.start() 8 | 9 | # Ensure a path, create if necessary 10 | zk.ensure_path("/test/zk1") 11 | 12 | # Create a node with data 13 | zk.create("/test/zk1/node", b"a test value11") 14 | 15 | # Determine if a node exists 16 | if zk.exists("/test/zk1"): 17 | print "the node exist" 18 | 19 | # Print the version of a node and its data 20 | data, stat = zk.get("/test/zk1") 21 | print("Version: %s, data: %s" % (stat.version, data.decode("utf-8"))) 22 | 23 | # List the children 24 | children = zk.get_children("/test/zk1") 25 | print("There are %s children with names %s" % (len(children), children)) 26 | 27 | zk.stop() 28 | -------------------------------------------------------------------------------- /python_utils/thread_process/basic_thread.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | """ 3 | This file is about basic operator about thread 4 | """ 5 | 6 | import threading 7 | import logging 8 | logging.basicConfig(level=logging.DEBUG, 9 | format='%(asctime)s %(threadName)s %(message)s', 10 | datefmt='%Y-%m-%d %I:%M:%S') 11 | 12 | 13 | def join_all_others_thread(): 14 | """ 15 | block the thread invoking this method 16 | :return: 17 | """ 18 | logging.debug('now join all the other threads') 19 | main_thread = threading.currentThread() 20 | for t in threading.enumerate(): 21 | if t is not main_thread: 22 | t.join() 23 | 24 | # the following msg will be print after all the other thread done 25 | logging.debug('join all the other threads success') 26 | -------------------------------------------------------------------------------- /python_utils/py_basic/tuple_operate.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | 4 | def list_2_tuple(): 5 | lst = ['python', 12, True, [1, 2]] 6 | tp = tuple(lst) 7 | print tp 8 | # ('python', 12, True, [1, 2]) 9 | tp = tuple(tuple(item) for item in lst) 10 | print tp 11 | 12 | 13 | def tuple_shift_left(tup, n): 14 | """ 15 | shift tuple over by n indices 16 | :param tup: like (1,2,3,4) 17 | :param n: 1 18 | :return: (2, 3, 4, 1) 19 | """ 20 | if n < 0: 21 | raise ValueError('n must be a positive integer') 22 | if not tup or not n: 23 | return tup 24 | n %= len(tup) 25 | return tup[n:] + tup[:n] 26 | 27 | if __name__ == '__main__': 28 | # list_2_tuple() 29 | tp = (1, 2, 3, 4) 30 | # print tuple_shift_left(tp, -1) 31 | # (2, 3, 4, 1) 32 | print tuple_shift_left(tp, 5) 33 | # (2, 3, 4, 1) 34 | -------------------------------------------------------------------------------- /python_utils/sk_sc_pd_operator/pd_plot.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import matplotlib.pyplot as plt 4 | import pandas as pd 5 | import numpy as np 6 | import seaborn as sns 7 | sns.set() 8 | 9 | 10 | def hist_1(): 11 | d = {"label": np.random.choice([0, 1, 2], size=1000), 12 | "values": np.random.randint(0, 10, size=1000)} 13 | 14 | df = pd.DataFrame(d) 15 | 16 | # df['label'].plot.hist(orientation='horizontal', cumulative=True) 17 | fig, axes = plt.subplots(nrows=1, ncols=3, sharex=True, sharey=True) 18 | df.hist(column="values", by="label", ax=axes) 19 | plt.suptitle('Your Title Here', x=0.5, y=1.05, ha='center', fontsize='xx-large') 20 | fig.text(0.5, 0.04, 'common X', ha='center') 21 | fig.text(0.04, 0.5, 'common Y', va='center', rotation='vertical') 22 | plt.show() 23 | 24 | if __name__ == '__main__': 25 | hist_1() 26 | pass 27 | -------------------------------------------------------------------------------- /python_utils/py_basic/num_ope.py: -------------------------------------------------------------------------------- 1 | # _*_coding:utf-8 _*_ 2 | """ 3 | float decimal points 4 | https://stackoverflow.com/questions/455612/limiting-floats-to-two-decimal-points 5 | """ 6 | 7 | 8 | def round_float(): 9 | f = 3.1415 10 | print round(f, 2) 11 | # 3.14 12 | 13 | a = 13.95 14 | print a 15 | # 13.95 16 | 17 | print "%.2f" % a 18 | # 13.95 19 | 20 | print "%.2f" % 13.9499999 21 | # 13.95 22 | 23 | a = 13.949999999999999 24 | print format(a, '.2f') 25 | # 13.95 26 | 27 | 28 | def or_shift(): 29 | print 1 >> 1, 1 << 1 # 0 2 30 | print 1 | 4, 1 | 2 # 5 3 31 | 32 | 33 | def print_binary(): 34 | m, n = 5, -5 35 | print '{0:b}'.format(m) 36 | print '{0:b}'.format(n) 37 | 38 | print bin(m), bin(n) 39 | 40 | 41 | if __name__ == '__main__': 42 | print_binary() 43 | # round_float() 44 | pass 45 | -------------------------------------------------------------------------------- /python_utils/numpy_operate/zero_one_empty.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import numpy as np 4 | 5 | 6 | def empty_test(): 7 | arr = np.array([[2.5, 3, 1], [1.1, -2, 3]]) 8 | print arr 9 | 10 | print np.empty_like(arr) 11 | 12 | print np.empty([2, 2]) 13 | # [[2.02554939e-316 2.50034710e-315] 14 | # [1.97872580e-316 2.00283462e-316]] 15 | 16 | print np.empty([2, 2], dtype=int) 17 | # [[58157000 0] 18 | # [58157064 0]] 19 | 20 | 21 | def zero_test(): 22 | print np.zeros(5) 23 | # [ 0. 0. 0. 0. 0.] 24 | print np.zeros((5,), dtype=np.int) 25 | # [0 0 0 0 0] 26 | print np.zeros((2, 1)) 27 | # [[ 0.] 28 | # [ 0.]] 29 | x = np.arange(6).reshape((2, 3)) 30 | print np.zeros_like(x) 31 | # [[0 0 0] 32 | # [0 0 0]] 33 | 34 | if __name__ == '__main__': 35 | # empty_test() 36 | zero_test() 37 | pass 38 | -------------------------------------------------------------------------------- /python_utils/opencv_basic/url_img_cv.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | import cv2 3 | import numpy as np 4 | 5 | 6 | def url_img_cv(): 7 | url = "http://yysnapshot.bs2src9.yy.com/636be6fc25410c5208d4c4ba5a22e2365768ec52?height=960&interval=12465&file=636be6fc25410c5208d4c4ba5a22e2365768ec52&width=544&bucket=yysnapshot&yid=7399736121338363914&day=20170817" 8 | try: 9 | url_response = urllib.urlopen(url) 10 | img_array = np.array(bytearray(url_response.read()), dtype=np.uint8) 11 | img = cv2.imdecode(img_array, -1) 12 | cv2.imshow('URL Image', img) 13 | cv2.waitKey() 14 | except Exception, e: 15 | print e 16 | finally: 17 | print 'no use line, nothing to be cleared' 18 | # can't return None in this scope, because this file is certainly to be executed 19 | # return None 20 | 21 | if __name__ == '__main__': 22 | url_img_cv() 23 | -------------------------------------------------------------------------------- /python_utils/DbService/sqlalchemy/orm.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | """ 4 | This file is about approaches of orm(object relational mapper) 5 | in sql alchemy 6 | """ 7 | 8 | from basic import get_db_session 9 | from sqlalchemy import Column, String, Integer 10 | from sqlalchemy.ext.declarative import declarative_base 11 | from sqlalchemy.ext.hybrid import hybrid_property 12 | Base = declarative_base() 13 | 14 | 15 | class User(Base): 16 | __tablename__ = 'user' 17 | 18 | id = Column(Integer, primary_key=True) 19 | nickname = Column(String) 20 | password = Column(String) 21 | 22 | @hybrid_property 23 | def name_pwd(self): 24 | return self.nickname + " " + self.password 25 | 26 | 27 | def query_orm(): 28 | session = get_db_session() 29 | user = session.query(User).first() 30 | print user.name_pwd 31 | 32 | if __name__ == '__main__': 33 | query_orm() 34 | -------------------------------------------------------------------------------- /python_utils/http_basic/flask_web/flask_basic_web.py: -------------------------------------------------------------------------------- 1 | # _*_coding:utf-8 _*_ 2 | 3 | from flask import Flask 4 | from flask import abort 5 | from flask import redirect 6 | from flask import request 7 | from flask import Response 8 | 9 | 10 | app = Flask(__name__) 11 | 12 | 13 | @app.route('/') 14 | def index(): 15 | return '

from win10 slow machine' 16 | 17 | 18 | @app.route('/user/') 19 | def say_hello(name): 20 | return '

hello, %s

' % name 21 | 22 | 23 | @app.route('/paras/') 24 | def multi_paras(): 25 | ret_str = '' 26 | for para in request.args: 27 | print para, request.args[para] 28 | ret_str += para 29 | 30 | return '

multi_paras, %s

' % ret_str 31 | 32 | 33 | if __name__ == '__main__': 34 | # app.run(debug=True) 35 | # so the other machine can visit the website by ip 36 | app.run(host='0.0.0.0', port=5000) 37 | 38 | pass 39 | -------------------------------------------------------------------------------- /python_utils/machine_learn/linear_regression/sk_example.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | import numpy as np 6 | from sklearn.linear_model import LinearRegression 7 | sns.set() 8 | 9 | 10 | def get_data(): 11 | rng = np.random.RandomState(1) 12 | x = 10 * rng.rand(50) 13 | y = 2 * x - 5 + rng.randn(50) 14 | # plt.scatter(x, y) 15 | # plt.show() 16 | return x, y 17 | 18 | 19 | def lr_fit(): 20 | x, y = get_data() 21 | model = LinearRegression(fit_intercept=True) 22 | model.fit(x[:, np.newaxis], y) 23 | xfit = np.linspace(0, 10, 1000) 24 | yfit = model.predict(xfit[:, np.newaxis]) 25 | 26 | print "Model slope: ", model.coef_[0] 27 | print "Model intercept:", model.intercept_ 28 | 29 | plt.scatter(x, y) 30 | plt.plot(xfit, yfit) 31 | plt.show() 32 | 33 | if __name__ == '__main__': 34 | lr_fit() 35 | # get_data() 36 | pass 37 | -------------------------------------------------------------------------------- /python_utils/machine_learn/neural_network_keras/lstm_nlp.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | """ 4 | This file is about using lstm to do nlp problem, 5 | classify sentiment of movie review 6 | """ 7 | import numpy as np 8 | from keras.datasets import imdb 9 | from keras.models import Sequential 10 | from keras.layers import Dense, LSTM 11 | from keras.layers.embeddings import Embedding 12 | from keras.preprocessing import sequence 13 | 14 | 15 | def generate_data(): 16 | # load the dataset but only keep the top n words, zero the rest 17 | top_words = 5000 18 | (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words) 19 | # truncate and pad input sequences 20 | max_review_length = 500 21 | X_train = sequence.pad_sequences(X_train, maxlen=max_review_length) 22 | X_test = sequence.pad_sequences(X_test, maxlen=max_review_length) 23 | 24 | 25 | if __name__ == '__main__': 26 | generate_data() 27 | pass 28 | -------------------------------------------------------------------------------- /python_utils/matplot/plot_accuracy_loss.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | 6 | 7 | def draw_result(lst_iter, lst_loss, lst_acc, title): 8 | plt.plot(lst_iter, lst_loss, '-b', label='loss') 9 | plt.plot(lst_iter, lst_acc, '-r', label='accuracy') 10 | 11 | plt.xlabel("n iteration") 12 | plt.legend(loc='upper left') 13 | plt.title(title) 14 | plt.savefig(title+".png") # should before show method 15 | 16 | plt.show() 17 | 18 | 19 | def test_draw(): 20 | lst_iter = range(100) 21 | lst_loss = [0.01 * i - 0.01 * i ** 2 for i in xrange(100)] 22 | # lst_loss = np.random.randn(1, 100).reshape((100, )) 23 | lst_acc = [0.01 * i + 0.01 * i ** 2 for i in xrange(100)] 24 | # lst_acc = np.random.randn(1, 100).reshape((100, )) 25 | draw_result(lst_iter, lst_loss, lst_acc, "sgd_method") 26 | 27 | 28 | if __name__ == '__main__': 29 | test_draw() 30 | pass 31 | -------------------------------------------------------------------------------- /python_utils/py_basic/argv_basic.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | import sys 4 | import logging 5 | 6 | 7 | def parse_js(): 8 | kv = {} 9 | for line in sys.stdin: 10 | print line 11 | 12 | if len(sys.argv) > 4: 13 | try: 14 | print sys.argv[1] 15 | id = int(sys.argv[1]) 16 | sid = int(sys.argv[2]) 17 | pre = int(sys.argv[3]) 18 | except ValueError, e: 19 | logging.error("Can't convert id or sid to int") 20 | sys.exit() 21 | else: 22 | msg = sys.argv[4] 23 | for arg in sys.argv[5:]: 24 | msg += " " 25 | msg += arg 26 | kv["id"] = id 27 | kv["alarm"] = 1 28 | kv["msg"] = msg.replace("'", "") 29 | 30 | if __name__ == '__main__': 31 | # config script para 32 | print sys.argv 33 | # ['E:/git_code/python_utils/py_basic/argv_basic.py', '1', '2', '3'] 34 | print len(sys.argv) 35 | # 4 36 | # parse_js() 37 | -------------------------------------------------------------------------------- /python_utils/py_basic/decorator_basic.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | """ 4 | python @ usage example 5 | """ 6 | 7 | 8 | def hello(fn): 9 | def wrapper(): 10 | print "hello, %s" % fn.__name__ 11 | fn() 12 | print 'bye, %s' % fn.__name__ 13 | return wrapper 14 | 15 | 16 | def do_nothing(fn): 17 | def wrapper(): 18 | print 'do not exe fn' 19 | return wrapper 20 | 21 | 22 | @hello 23 | def foo(): 24 | print 'I am foo' 25 | # hello, foo 26 | # I am foo 27 | # bye, foo 28 | 29 | 30 | @do_nothing 31 | def foo_nothing(): 32 | print 'I am foo_nothing' 33 | # do not exe fn 34 | 35 | 36 | @do_nothing 37 | @hello 38 | def foo_nested(): 39 | print 'I am foo_nested' 40 | # do not exe fn 41 | 42 | 43 | @hello 44 | @do_nothing 45 | def foo_nested_v2(): 46 | print 'I am foo_nested_v2' 47 | 48 | 49 | if __name__ == '__main__': 50 | # foo() 51 | # foo_nothing() 52 | # foo_nested() 53 | foo_nested_v2() 54 | pass 55 | -------------------------------------------------------------------------------- /python_utils/numpy_operate/broadcast_demo.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import numpy as np 4 | 5 | 6 | def broadcast_demo(): 7 | arr = np.ones((3, 4)) 8 | print arr 9 | print arr + 1 10 | 11 | b = np.broadcast(arr, 1) 12 | print b.shape 13 | # (3L, 4L) 14 | 15 | 16 | def bc_demo_2(): 17 | a = np.array([1.0, 2.0, 3]) 18 | b = np.ones(3) * 2 19 | print b 20 | # [ 2. 2. 2.] 21 | print a * b 22 | # [ 2. 4. 6.] 23 | 24 | b = 2 25 | print a * b 26 | # [ 2. 4. 6.] 27 | 28 | x = np.arange(4).reshape(4, 1) 29 | print x 30 | # [[0] 31 | # [1] 32 | # [2] 33 | # [3]] 34 | y = np.ones(5) 35 | print y 36 | # [ 1. 1. 1. 1. 1.] 37 | z = x + y 38 | print z 39 | # [[ 1. 1. 1. 1. 1.] 40 | # [ 2. 2. 2. 2. 2.] 41 | # [ 3. 3. 3. 3. 3.] 42 | # [ 4. 4. 4. 4. 4.]] 43 | print z.shape 44 | # (4L, 5L) 45 | 46 | if __name__ == '__main__': 47 | # broadcast_demo() 48 | bc_demo_2() 49 | pass 50 | -------------------------------------------------------------------------------- /python_utils/machine_learn/decision_tree/tree.dot: -------------------------------------------------------------------------------- 1 | digraph Tree { 2 | node [shape=box] ; 3 | 0 [label="loan level <= 0.5\ngini = 0.48\nsamples = 15\nvalue = [6, 9]"] ; 4 | 1 [label="has work <= 0.5\ngini = 0.32\nsamples = 5\nvalue = [4, 1]"] ; 5 | 0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ; 6 | 2 [label="gini = 0.0\nsamples = 4\nvalue = [4, 0]"] ; 7 | 1 -> 2 ; 8 | 3 [label="gini = 0.0\nsamples = 1\nvalue = [0, 1]"] ; 9 | 1 -> 3 ; 10 | 4 [label="own house <= 0.5\ngini = 0.32\nsamples = 10\nvalue = [2, 8]"] ; 11 | 0 -> 4 [labeldistance=2.5, labelangle=-45, headlabel="False"] ; 12 | 5 [label="age <= 1.5\ngini = 0.4444\nsamples = 6\nvalue = [2, 4]"] ; 13 | 4 -> 5 ; 14 | 6 [label="has work <= 0.5\ngini = 0.4444\nsamples = 3\nvalue = [2, 1]"] ; 15 | 5 -> 6 ; 16 | 7 [label="gini = 0.0\nsamples = 2\nvalue = [2, 0]"] ; 17 | 6 -> 7 ; 18 | 8 [label="gini = 0.0\nsamples = 1\nvalue = [0, 1]"] ; 19 | 6 -> 8 ; 20 | 9 [label="gini = 0.0\nsamples = 3\nvalue = [0, 3]"] ; 21 | 5 -> 9 ; 22 | 10 [label="gini = 0.0\nsamples = 4\nvalue = [0, 4]"] ; 23 | 4 -> 10 ; 24 | } -------------------------------------------------------------------------------- /python_utils/DbService/sqlalchemy/basic.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | from sqlalchemy import create_engine 4 | from sqlalchemy.orm import sessionmaker 5 | from utils.JsonUtil import get_json_from_file 6 | 7 | 8 | def get_db_session(): 9 | db_config_file = '../config/mysql_config.json' 10 | db_js_data = get_json_from_file(db_config_file) 11 | db_connect = 'mysql+mysqldb://{user}:{pwd}@{host}/{db}?charset=utf8'.format(**db_js_data) 12 | print db_connect 13 | # mysql+mysqldb://root:123@localhost/springdemo?charset=utf8 14 | engine = create_engine(db_connect, echo=True) 15 | session = sessionmaker(bind=engine) 16 | return session() 17 | 18 | 19 | def query_example(): 20 | session = get_db_session() 21 | print session.execute('show databases').fetchall() 22 | # [(u'springdemo',), (u'test',), (u'world',)] 23 | print session.execute('select * from tb_yylive_news where id = 1').first() 24 | # (1L, u'http://www.bbc.com') 25 | 26 | if __name__ == '__main__': 27 | # get_db_session() 28 | query_example() 29 | -------------------------------------------------------------------------------- /python_utils/machine_learn/cluster/sk_cluster.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | """ 4 | https://jakevdp.github.io/PythonDataScienceHandbook/05.11-k-means.html 5 | """ 6 | 7 | from sklearn.cluster import KMeans 8 | from sklearn.datasets.samples_generator import make_blobs 9 | import matplotlib.pyplot as plt 10 | import seaborn as sns 11 | sns.set() 12 | import numpy as np 13 | 14 | 15 | def get_data(): 16 | x, y_true = make_blobs(n_samples=300, centers=4, 17 | cluster_std=0.60, random_state=0) 18 | plt.scatter(x[:, 0], x[:, 1], s=50) 19 | # plt.show() 20 | return x 21 | 22 | 23 | def predict(): 24 | x = get_data() 25 | kmeans = KMeans(n_clusters=4) 26 | kmeans.fit(x) 27 | y_kmeans = kmeans.predict(x) 28 | # print y_kmeans 29 | plt.scatter(x[:, 0], x[:, 1], c=y_kmeans, s=50, cmap='viridis') 30 | centers = kmeans.cluster_centers_ 31 | print centers 32 | plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5) 33 | plt.show() 34 | 35 | if __name__ == '__main__': 36 | # get_data() 37 | predict() 38 | pass 39 | -------------------------------------------------------------------------------- /python_utils/http_basic/http_realize/http_svr_basic_1.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import socket 4 | import BaseHTTPServer 5 | import time 6 | 7 | 8 | HOST = "127.0.0.1" 9 | PORT = 8888 10 | 11 | RESPONSE = b"""\ 12 | HTTP/1.1 200 OK 13 | Content-type: text/html 14 | Content-length: 15 15 | 16 |

Hello!

""".replace(b"\n", b"\r\n") 17 | 18 | 19 | RESPONSE = 'a' * (1024 * 1) 20 | 21 | 22 | def test_simple(): 23 | server_sock = socket.socket() 24 | server_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 25 | server_sock.bind((HOST, PORT)) 26 | server_sock.listen(0) 27 | print "Listening on %s:%s..." % (HOST, PORT) 28 | 29 | while 1: 30 | client_sock, client_addr = server_sock.accept() 31 | print "New connection from %s:%s." % (client_addr) 32 | # client_sock.sendall(RESPONSE) 33 | time.sleep(12) 34 | 35 | data = client_sock.recv(1024) 36 | print "recv :%s" % data 37 | 38 | n = client_sock.send(RESPONSE) 39 | print "just send %s bytes" % n 40 | 41 | 42 | if __name__ == '__main__': 43 | test_simple() 44 | pass 45 | -------------------------------------------------------------------------------- /python_utils/sk_sc_pd_operator/sk_metric_accuracy.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | import numpy as np 4 | from sklearn.metrics import accuracy_score, confusion_matrix 5 | from sklearn.metrics import classification_report 6 | 7 | 8 | def accuracy_score_demo(): 9 | y_true = [0, 1, 2, 3, 2, 6] 10 | y_pred = [0, 2, 1, 3, 4, 7] 11 | print accuracy_score(y_true, y_pred) 12 | # 0.5 13 | print confusion_matrix(y_true, y_pred) 14 | 15 | y_true = [0, 1, 0, 1, 1, 0, 1, 0, 1] 16 | y_pred = [0, 0, 1, 0, 0, 0, 1, 1, 0] 17 | print confusion_matrix(y_true, y_pred) 18 | # [[2 2] 四个 0 两个被识别为了 0, 两个被识别为 1 19 | # [4 1]] 五个 1 四个被识别为了 0, 一个被识别为 1 20 | 21 | 22 | def classification_report_demo(): 23 | y_pred = [0, 0, 2, 1, 0] 24 | y_true = [0, 1, 2, 2, 0] 25 | target_names = ['class 0', 'class 1', 'class 2'] 26 | print classification_report(y_true, y_pred, target_names=target_names) 27 | 28 | print confusion_matrix(y_true, y_pred) 29 | # [[2 0 0] 30 | # [1 0 0] 31 | # [0 1 1]] 32 | 33 | if __name__ == '__main__': 34 | classification_report_demo() 35 | # accuracy_score_demo() 36 | pass 37 | -------------------------------------------------------------------------------- /python_utils/thread_process/thread_timer.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | """ 3 | python timer usage cited from: 4 | http://www.bogotobogo.com/python/Multithread/python_multithreading_subclassing_Timer_Object.php 5 | https://stackoverflow.com/questions/16578652/threading-timer 6 | 7 | repeat timer 8 | https://stackoverflow.com/questions/12435211/python-threading-timer-repeat-function-every-n-seconds 9 | """ 10 | 11 | import time 12 | import threading 13 | 14 | 15 | def hello(): 16 | print 'hello world' 17 | 18 | 19 | def hello_2(name): 20 | print 'hello %s \n' % name 21 | 22 | 23 | def test_timer_no_para(): 24 | """no parameter timer test 25 | """ 26 | t = threading.Timer(5, hello) 27 | # the method will be executed after 5 S 28 | # just run once, then exit 29 | t.start() 30 | 31 | 32 | def test_timer_with_para(): 33 | """no parameter timer test 34 | """ 35 | name = 'bear fish' 36 | t = threading.Timer(5, hello_2, [name]) 37 | # the method will be executed after 5 S 38 | # just run once, then exit 39 | t.start() 40 | 41 | if __name__ == '__main__': 42 | test_timer_no_para() 43 | test_timer_with_para() 44 | pass 45 | -------------------------------------------------------------------------------- /python_utils/machine_learn/decision_tree/create_data.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import numpy as np 4 | 5 | 6 | def get_loan_data_lh(): 7 | """ 8 | this data is from li hang's book 9 | the feature is 10 | age(0->young, 1->middle-aged, 2->old), 11 | have work(0->have, 1-not have), 12 | have house(0->have, 1-not have), 13 | loan level(0->just so so, 1->good, 2->very good) 14 | """ 15 | x = np.array([ 16 | [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2], 17 | [0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0], 18 | [0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0], 19 | [0, 1, 1, 0, 0, 0, 1, 1, 2, 2, 2, 1, 1, 2, 0] 20 | ]) 21 | 22 | x = x.T 23 | y = np.array([0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0]) 24 | return x, y 25 | 26 | 27 | def get_data(): 28 | x = [[0, 0, 0, 0, 'N'], 29 | [0, 0, 0, 1, 'N'], 30 | [1, 0, 0, 0, 'Y'], 31 | [2, 1, 0, 0, 'Y'], 32 | [2, 2, 1, 0, 'Y'], 33 | [2, 2, 1, 1, 'N'], 34 | [1, 2, 1, 1, 'Y']] 35 | 36 | y = ['outlook', 'temperature', 'humidity', 'windy'] 37 | return x, y 38 | 39 | if __name__ == '__main__': 40 | 41 | pass 42 | -------------------------------------------------------------------------------- /python_utils/thread_process/multitread_profile.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | 计算型的任务,使用多线程GIL就会让多线程变慢。我们举个计算斐波那契数列的例子 4 | http://www.dongwm.com/archives/%E4%BD%BF%E7%94%A8Python%E8%BF%9B%E8%A1%8C%E5%B9%B6%E5%8F%91%E7%BC%96%E7%A8%8B-%E7%BA%BF%E7%A8%8B%E7%AF%87/ 5 | 6 | """ 7 | 8 | import time 9 | import threading 10 | 11 | 12 | def profile(func): 13 | def wrapper(*args, **kwargs): 14 | start = time.time() 15 | func(*args, **kwargs) 16 | end = time.time() 17 | print 'COST: {}'.format(end - start) 18 | return wrapper 19 | 20 | 21 | def fib(n): 22 | if n <= 2: 23 | return 1 24 | return fib(n-1) + fib(n-2) 25 | 26 | 27 | @profile 28 | def nothread(): 29 | fib(35) 30 | fib(35) 31 | 32 | 33 | @profile 34 | def hasthread(): 35 | for i in range(2): 36 | t = threading.Thread(target=fib, args=(35,)) 37 | t.start() 38 | main_thread = threading.currentThread() 39 | for t in threading.enumerate(): 40 | if t is main_thread: 41 | continue 42 | t.join() 43 | 44 | if __name__ == '__main__': 45 | nothread() 46 | # COST: 16.8039999008 47 | hasthread() 48 | # COST: 42.8039999008 49 | -------------------------------------------------------------------------------- /python_utils/http_basic/flask_web/flask_content_type.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | """ 4 | cited from http://blog.luisrei.com/articles/flaskrest.html 5 | test different http request head Content-Type 6 | """ 7 | 8 | from flask import Flask 9 | from flask import request 10 | import json 11 | 12 | app = Flask(__name__) 13 | 14 | 15 | @app.route('/message', methods=['POST']) 16 | def api_msg(): 17 | if request.headers['Content-Type'] == 'text/plain': 18 | return "Text Message: " + request.data 19 | 20 | elif request.headers['Content-Type'] == 'application/json': 21 | return "Json Message: " + json.dumps(request.json) 22 | 23 | elif request.headers['Content-Type'] == 'application/octet-stream': 24 | print len(request.data) 25 | with open('./file.name', 'wb') as f: 26 | f.write(request.data) 27 | 28 | return "binary file written" 29 | 30 | elif request.headers['Content-Type'] == 'multipart/form-data': 31 | print "111" 32 | print request.args 33 | else: 34 | return "415 unsupported media type" 35 | 36 | 37 | if __name__ == '__main__': 38 | # app.run(debug=True) 39 | app.run(host='0.0.0.0') 40 | pass 41 | -------------------------------------------------------------------------------- /python_utils/numpy_operate/array_multiply.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | """ 4 | This file is some demo about numpy array multiply operator 5 | """ 6 | import numpy as np 7 | 8 | 9 | def one_dim_arr_multiply(): 10 | """ 11 | the difference between * and dot operator about numpy.array() 12 | of one dimension 13 | :return none: 14 | """ 15 | arr1 = np.array([1, 2]) 16 | arr2 = np.array([3, 4]) 17 | print arr1 * arr2 # -> [3 8] 18 | # for 1 dim array np.dot gets inner product of vector 19 | print np.dot(arr1, arr2.transpose()) # 11 20 | print arr1, arr2.transpose() 21 | print np.dot(arr1, arr2) # 11 22 | 23 | 24 | def mul_dim_arr_multiply(): 25 | """ 26 | the difference between * and dot operator about numpy.array() 27 | of multiple dimension 28 | """ 29 | arr1 = np.array([[1], [2]]) 30 | arr2 = np.array([[3], [4]]) 31 | print arr1 * arr2 32 | # >> [[3] 33 | # [8]] 34 | print np.dot(arr1, arr2.transpose()) 35 | # >>>[[3 4] 36 | # [6 8]] 37 | print np.dot(arr1, arr2) 38 | # ValueError: shapes (2,1) and (2,1) not aligned: 1 (dim 1) != 2 (dim 0) 39 | 40 | if __name__ == '__main__': 41 | # one_dim_arr_multiply() 42 | mul_dim_arr_multiply() 43 | -------------------------------------------------------------------------------- /python_utils/sk_sc_pd_operator/pd_feature2value.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | def factorize_1(): 9 | df = pd.DataFrame({"A": list('cbaa'), 10 | "B": list('zyxz')}) 11 | 12 | print df 13 | # A B 14 | # 0 c z 15 | # 1 b y 16 | # 2 a x 17 | # 3 a z 18 | 19 | print df.apply(lambda col: pd.factorize(col, sort=True)[0]) 20 | # A B 21 | # 0 2 2 22 | # 1 1 1 23 | # 2 0 0 24 | # 3 0 2 25 | 26 | print df.apply(lambda col: pd.factorize(col)[0]) 27 | # A B 28 | # 0 0 0 29 | # 1 1 1 30 | # 2 2 2 31 | # 3 2 0 32 | 33 | 34 | def count_1(): 35 | df = pd.DataFrame({'a': list('absba')}) 36 | print df.groupby('a')['a'].count() 37 | # a 38 | # a 2 39 | # b 2 40 | # s 1 41 | 42 | print df['a'].value_counts() 43 | # b 2 44 | # a 2 45 | # s 1 46 | df['freq'] = df.groupby('a')['a'].transform('count') 47 | print df 48 | # a freq 49 | # 0 a 2 50 | # 1 b 2 51 | # 2 s 1 52 | # 3 b 2 53 | # 4 a 2 54 | 55 | if __name__ == '__main__': 56 | count_1() 57 | # factorize_1() 58 | pass 59 | -------------------------------------------------------------------------------- /python_utils/http_basic/http_realize/http_realize_1/http_server_1.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import socket 4 | 5 | 6 | RESPONSE = b"""\ 7 | HTTP/1.1 200 OK 8 | Content-type: text/html 9 | Content-length: 15 10 | 11 |

Hello!

""".replace(b"\n", b"\r\n") 12 | 13 | 14 | def http_server_1(): 15 | # host, port = "127.0.0.1", 8888 16 | host, port = "127.0.0.1", 0 17 | 18 | # By default, socket.socket creates TCP sockets. 19 | # with socket.socket() as server_sock: 20 | server_sock = socket.socket() 21 | # This tells the kernel to reuse sockets that are in `TIME_WAIT` state. 22 | server_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 23 | 24 | # This tells the socket what address to bind to. 25 | server_sock.bind((host, port)) 26 | 27 | server_sock.listen(0) 28 | 29 | # print "listening on %s:%s" % (host, port) 30 | print "listening on %s:%s" % (server_sock.getsockname()) 31 | 32 | while True: 33 | client_sock, client_addr = server_sock.accept() 34 | print "new connection from %s: %s" % (client_addr, client_sock) 35 | data = client_sock.recv(1024) 36 | print "recv :%s" % data 37 | 38 | with client_sock: 39 | client_sock.sendall(RESPONSE) 40 | 41 | 42 | if __name__ == '__main__': 43 | http_server_1() 44 | pass 45 | -------------------------------------------------------------------------------- /python_utils/numpy_operate/np_distance.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | """ 4 | 5 | """ 6 | import numpy as np 7 | 8 | 9 | def euclidean_distance(): 10 | a1 = np.array([1, 2, 3]) 11 | a2 = np.array([3, 4, 5]) 12 | print np.square(a1, a2) 13 | 14 | 15 | def np_sum(): 16 | a1 = np.array([1, 2, 3]) 17 | a2 = np.array([3, 4, 5]) 18 | print a1 - a2 19 | # [-2 -2 -2] 20 | print (a1 - a2)**2 21 | # [4 4 4] 22 | print np.sum(a1 - a2) 23 | # -6 24 | print np.sum((a1 - a2)**2) 25 | # 12 26 | 27 | 28 | def np_sqrt(): 29 | a1 = np.array([1, 4, 9]) 30 | print np.sqrt(a1) 31 | # [ 1. 2. 3.] 32 | 33 | 34 | def euclidean_distance_v2(): 35 | a1 = np.array([1, 2, 3]) 36 | a2 = np.array([3, 4, 5]) 37 | print np.sqrt(np.sum((a1 - a2)**2)) 38 | # 3.46410161514 39 | 40 | 41 | def euclidean_distance_v3(): 42 | a1 = np.array([1, 2, 3]) 43 | a2 = np.array([3, 4, 5]) 44 | print np.linalg.norm(a1 - a2) 45 | # 3.46410161514 46 | 47 | 48 | def eu_distance(): 49 | a1 = [1, 2, 3] 50 | a2 = [3, 4, 5] 51 | from math import sqrt 52 | print sqrt(sum((a - b)**2 for a, b in zip(a1, a2))) 53 | # 3.46410161514 54 | 55 | if __name__ == '__main__': 56 | euclidean_distance() 57 | # euclidean_distance_v2() 58 | # np_sum() 59 | # euclidean_distance_v3() 60 | # np_sqrt() 61 | pass 62 | -------------------------------------------------------------------------------- /python_utils/thread_process/thread_condition.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | """ 3 | This file is about a sample demo about threading.condition 4 | """ 5 | 6 | import threading 7 | import time 8 | import logging 9 | from basic_thread import join_all_others_thread 10 | logging.basicConfig(level=logging.DEBUG, 11 | format='%(asctime)s %(threadName)s %(message)s', 12 | datefmt='%Y-%m-%d %I:%M:%S') 13 | 14 | 15 | def notify_condition(con): 16 | with con: 17 | logging.debug('now notify all the condition') 18 | con.notifyAll() 19 | 20 | 21 | def wait_condition(con): 22 | with con: 23 | logging.debug('I am waiting for an condition') 24 | con.wait() 25 | logging.debug('I get the condition.....') 26 | 27 | 28 | def test_demo(): 29 | con = threading.Condition() 30 | t_w1 = threading.Thread(name='t_w1', target=wait_condition, args=(con, )) 31 | t_w2 = threading.Thread(name='t_w2', target=wait_condition, args=(con, )) 32 | t_n1 = threading.Thread(name='t_n1', target=notify_condition, args=(con, )) 33 | t_w1.start() 34 | t_w2.start() 35 | 36 | logging.debug('now main thread sleeping 5S') 37 | time.sleep(5) 38 | t_n1.start() 39 | 40 | join_all_others_thread() 41 | logging.debug('now all have been done') 42 | 43 | if __name__ == '__main__': 44 | test_demo() 45 | pass 46 | -------------------------------------------------------------------------------- /python_utils/py_basic/yield_ope.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | 4 | def f_135(): 5 | yield 1 6 | yield 3 7 | yield 5 8 | 9 | 10 | def demo_1(): 11 | for val in f_135(): 12 | print val 13 | 14 | # 1 3 5 15 | 16 | generator = f_135() 17 | print next(generator) 18 | # 1 19 | print next(generator) 20 | # 3 21 | print next(generator) 22 | # 5 23 | 24 | 25 | def fibonacci(n): 26 | cur = 1 27 | pre = 0 28 | count = 0 29 | while count < n: 30 | yield cur 31 | cur, pre = cur + pre, cur 32 | count += 1 33 | 34 | 35 | def demo_fib(): 36 | ge_fib = fibonacci(10) 37 | for i in ge_fib: 38 | print i, ", " 39 | # 1 , 1 , 2 , 3 , 5 , 8 , 13 , 21 , 34 , 55 40 | 41 | ge_fib = fibonacci(5) 42 | print next(ge_fib) 43 | # 1 44 | print next(ge_fib) 45 | # 1 46 | 47 | 48 | def read_file(f_path='__init__.py'): 49 | # read 60 bytes once 50 | bt_once = 60 51 | with open(f_path, 'rb') as fmp3: 52 | data = fmp3.read(bt_once) 53 | while data: 54 | yield data 55 | data = fmp3.read(bt_once) 56 | 57 | 58 | def demo_read_file(): 59 | for txt in read_file(): 60 | print txt 61 | 62 | # # coding:utf-8 63 | # if __name__ == '__main__': 64 | # pass 65 | 66 | if __name__ == '__main__': 67 | # demo_1() 68 | # demo_fib() 69 | demo_read_file() 70 | pass 71 | -------------------------------------------------------------------------------- /python_utils/netsocket/ip_int.py: -------------------------------------------------------------------------------- 1 | import struct 2 | import socket 3 | import math 4 | 5 | 6 | def ip2int(addr): 7 | return struct.unpack("!I", socket.inet_aton(addr))[0] 8 | 9 | 10 | def int2ip(addr): 11 | return socket.inet_ntoa(struct.pack("!I", addr)) 12 | 13 | 14 | def str_ip2_int(s_ip='192.168.1.100'): 15 | lst = [int(item) for item in s_ip.split('.')] 16 | print lst 17 | # [192, 168, 1, 100] 18 | 19 | int_ip = lst[3] | lst[2] << 8 | lst[1] << 16 | lst[0] << 24 20 | return int_ip # 3232235876 21 | 22 | 23 | def str_ip2_int_v2(s_ip='192.168.1.100'): 24 | lst = [int(item) for item in s_ip.split('.')] 25 | lst = map(int, s_ip.split('.')) 26 | print lst 27 | # [192, 168, 1, 100] 28 | 29 | int_ip = lst[3] + lst[2] * pow(2, 8) + lst[1] * pow(2, 16) + lst[0] * pow(2, 24) 30 | return int_ip # 3232235876 31 | 32 | 33 | def int_ip2str(int_ip=3232235876): 34 | lst = [] 35 | for i in xrange(4): 36 | shift_n = 8 * i 37 | lst.insert(0, str((int_ip >> shift_n) & 0xff)) 38 | 39 | return ".".join(lst) 40 | 41 | 42 | if __name__ == '__main__': 43 | str_ip = '192.168.1.100' 44 | int_ip = ip2int(str_ip) 45 | print "%s -> int is: %s" % (str_ip, int_ip) 46 | # 192.168.1.100 -> int is: 3232235876 47 | 48 | str_ip = int2ip(int_ip) 49 | print "%s -> str is: %s" % (int_ip, str_ip) 50 | # 3232235876 -> str is: 192.168.1.100 51 | 52 | print str_ip2_int_v2() 53 | 54 | print int_ip2str(int_ip) 55 | 56 | -------------------------------------------------------------------------------- /python_utils/http_basic/wsgi_demo/wsgi_demo.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | """ 4 | C:\Users\DW>curl -i -X POST 127.0.0.1:8080 5 | HTTP/1.0 200 OK 6 | Date: Fri, 01 Dec 2017 12:06:27 GMT 7 | Server: WSGIServer/0.1 Python/2.7.12 8 | Content-Type: text/html 9 | Content-Length: 257 10 | 11 | Hello User! 12 | 13 | ....... 14 | 15 | 16 | """ 17 | 18 | from wsgiref.simple_server import make_server 19 | 20 | 21 | def application(environ, start_response): 22 | start_response("200 OK", [("Content-type", "text/plain")]) 23 | return ["Hello my friend!".encode("utf-8")] 24 | 25 | 26 | form = """ Hello User! 27 | 28 |
29 | 30 | 31 | 32 |
33 | 34 | 35 | """ 36 | 37 | 38 | def app_post(environ, start_response): 39 | html = form 40 | start_response('200 OK', [('Content-Type', 'text/html')]) 41 | print environ 42 | if environ['REQUEST_METHOD'] == 'POST': 43 | return [html] 44 | 45 | elif environ['REQUEST_METHOD'] == 'GET': 46 | return ["get request".encode("utf-8")] 47 | 48 | if __name__ == '__main__': 49 | # server = make_server('localhost', 8080, application) 50 | server = make_server('localhost', 8080, app_post) 51 | server.serve_forever() 52 | pass 53 | -------------------------------------------------------------------------------- /python_utils/py_basic/random_operator.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | import random 3 | import string 4 | 5 | 6 | def generate_random_num_str(length): 7 | return ''.join(random.choice(string.letters) for i in range(length)) 8 | 9 | 10 | def get_random_int(): 11 | # n -> [0, 10] 12 | n = random.randint(0, 10) 13 | print n 14 | 15 | print random.randint(180, 200) 16 | 17 | 18 | def rand_range(): 19 | print random.randrange(1, 100) 20 | # like 23 21 | print random.randrange(0, 100, 10) 22 | # 20 23 | print random.randrange(0, 100, 10) 24 | # 90 25 | 26 | 27 | def random_seed(): 28 | sd = 3 29 | random.seed(sd) 30 | print "Random number with seed 10 : ", random.random() 31 | 32 | # It will generate same random number(do random.seed(sd) every time before) 33 | random.seed(sd) 34 | print "Random number with seed 10 : ", random.random() 35 | 36 | # It will generate same random number 37 | random.seed(sd) 38 | print "Random number with seed 10 : ", random.random() 39 | 40 | if __name__ == '__main__': 41 | # random_seed() 42 | # rand_range() 43 | # get_random_int() 44 | # print random.choice(string.letters) 45 | # f 46 | # print generate_random_num_str(5) 47 | # bjSQU 48 | # print string.letters 49 | # abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ 50 | import time 51 | t = int(time.time()) 52 | print random.randint(1000000, 100000000) 53 | 54 | print random.choice([1, 3, 5, 7]) 55 | 56 | pass 57 | -------------------------------------------------------------------------------- /python_utils/py_basic/decorator_set.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | """ 4 | cited from stack overflow 5 | thread safe set, by decorator 6 | """ 7 | from threading import Lock 8 | 9 | 10 | def locked_method(method): 11 | """Method decorator. Requires a lock object at self._lock""" 12 | def newmethod(self, *args, **kwargs): 13 | with self._lock: 14 | return method(self, *args, **kwargs) 15 | return newmethod 16 | 17 | 18 | class DecoratorLockedSet(set): 19 | def __init__(self, *args, **kwargs): 20 | self._lock = Lock() 21 | super(DecoratorLockedSet, self).__init__(*args, **kwargs) 22 | 23 | @locked_method 24 | def add(self, *args, **kwargs): 25 | return super(DecoratorLockedSet, self).add(args, kwargs) 26 | 27 | @locked_method 28 | def remove(self, *args, **kwargs): 29 | return super(DecoratorLockedSet, self).remove(args, kwargs) 30 | 31 | 32 | class LockedSet(set): 33 | """A set where add(), remove(), and 'in' operator are thread-safe""" 34 | 35 | def __init__(self, *args, **kwargs): 36 | self._lock = Lock() 37 | super(LockedSet, self).__init__(*args, **kwargs) 38 | 39 | def add(self, elem): 40 | with self._lock: 41 | super(LockedSet, self).add(elem) 42 | 43 | def remove(self, elem): 44 | with self._lock: 45 | super(LockedSet, self).remove(elem) 46 | 47 | def __contains__(self, elem): 48 | with self._lock: 49 | super(LockedSet, self).__contains__(elem) 50 | 51 | -------------------------------------------------------------------------------- /python_utils/sk_sc_pd_operator/split_train_test_data.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import pandas as pd 4 | import numpy as np 5 | 6 | 7 | def split_1(): 8 | df = pd.DataFrame({'lst1': range(5), 9 | 'lst2': range(5)[::-1]}, 10 | columns=['lst1', 'lst2']) 11 | 12 | print df 13 | # lst1 lst2 14 | # 0 0 4 15 | # 1 1 3 16 | # 2 2 2 17 | # 3 3 1 18 | # 4 4 0 19 | 20 | train = df.sample(frac=0.8, random_state=200) 21 | print train 22 | # lst1 lst2 23 | # 3 3 1 24 | # 4 4 0 25 | # 0 0 4 26 | # 1 1 3 27 | 28 | test = df.drop(train.index) 29 | print test 30 | # lst1 lst2 31 | # 2 2 2 32 | 33 | 34 | def split_2(): 35 | from sklearn.model_selection import train_test_split 36 | 37 | df = pd.DataFrame({'lst1': range(5), 38 | 'lst2': range(5)[::-1]}, 39 | columns=['lst1', 'lst2']) 40 | 41 | print df 42 | # lst1 lst2 43 | # 0 0 4 44 | # 1 1 3 45 | # 2 2 2 46 | # 3 3 1 47 | # 4 4 0 48 | 49 | train, test = train_test_split(df, test_size=0.2) 50 | 51 | print train 52 | # lst1 lst2 53 | # 2 2 2 54 | # 0 0 4 55 | # 4 4 0 56 | # 1 1 3 57 | 58 | print test 59 | # lst1 lst2 60 | # 3 3 1 61 | 62 | if __name__ == '__main__': 63 | split_2() 64 | # split_1() 65 | pass 66 | -------------------------------------------------------------------------------- /python_utils/numpy_operate/arr_equal_close.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | import numpy as np 4 | 5 | 6 | def arr_equal(): 7 | ar1 = np.array([[1, 2], [3, 4]]) 8 | ar2 = np.array([[1, 2], [3, 4]]) 9 | ar3 = np.array([[1, 2], [3, 5]]) 10 | 11 | print np.array_equal(ar1, ar2) 12 | # True 13 | print np.array_equal(ar1, ar3) 14 | # False 15 | 16 | 17 | def arr_equiv(): 18 | ar1 = np.array([[1, 2], [3, 4]]) 19 | ar2 = np.array([[1, 2]]) 20 | ar3 = np.array([[1, 2], [1, 2]]) 21 | ar4 = np.array([1, 2]) 22 | print np.array_equiv(ar1, ar2) 23 | # False 24 | print np.array_equiv(ar1, ar4) 25 | # False 26 | print np.array_equiv(ar2, ar3) 27 | # True 28 | 29 | 30 | def arr_close(): 31 | ar1 = np.array([[1, 2], [3, 4]]) 32 | ar2 = np.array([[1.1, 2.1], [3.1, 4.1]]) 33 | ar3 = np.array([[1.00001, 2.00001], [3.00001, 4.00001]]) 34 | ar4 = np.array([[1.0001, 2.0001], [3.0001, 4.0001]]) 35 | 36 | print np.isclose(ar1, ar2) 37 | # [[False False] 38 | # [False False]] 39 | print np.isclose(ar1, ar3) 40 | # [[ True True] 41 | # [ True True]] 42 | print np.isclose(ar1, ar4) 43 | # [[False False] 44 | # [False False]] 45 | print np.isclose(ar1, ar4, atol=1.e-4) 46 | # [[ True True] 47 | # [ True True]] 48 | 49 | print np.allclose([1e10, 1e-7], [1.00001e10, 1e-8]) 50 | # False 51 | print np.allclose([1e10, 1e-8], [1.00001e10, 1e-9]) 52 | # True 53 | 54 | if __name__ == '__main__': 55 | arr_close() 56 | # arr_equal() 57 | # arr_equiv() 58 | pass 59 | -------------------------------------------------------------------------------- /python_utils/machine_learn/logistic_regression/lr_sklearn_v1.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | # Code source: Gaël Varoquaux 4 | # Modified for documentation by Jaques Grobler 5 | # License: BSD 3 clause 6 | # http://scikit-learn.org/stable/auto_examples/linear_model/plot_iris_logistic.html 7 | 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | from sklearn import linear_model, datasets 11 | 12 | # import some data to play with 13 | iris = datasets.load_iris() 14 | X = iris.data[:, :2] # we only take the first two features. 15 | Y = iris.target 16 | 17 | h = .02 # step size in the mesh 18 | 19 | logreg = linear_model.LogisticRegression(C=1e5) 20 | 21 | # we create an instance of Neighbours Classifier and fit the data. 22 | logreg.fit(X, Y) 23 | 24 | # Plot the decision boundary. For that, we will assign a color to each 25 | # point in the mesh [x_min, x_max]x[y_min, y_max]. 26 | x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 27 | y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 28 | xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) 29 | Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()]) 30 | 31 | # Put the result into a color plot 32 | Z = Z.reshape(xx.shape) 33 | plt.figure(1, figsize=(4, 3)) 34 | plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired) 35 | 36 | # Plot also the training points 37 | plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k', cmap=plt.cm.Paired) 38 | plt.xlabel('Sepal length') 39 | plt.ylabel('Sepal width') 40 | 41 | plt.xlim(xx.min(), xx.max()) 42 | plt.ylim(yy.min(), yy.max()) 43 | plt.xticks(()) 44 | plt.yticks(()) 45 | 46 | plt.show() 47 | -------------------------------------------------------------------------------- /python_utils/py_basic/dw_img_from_google.py: -------------------------------------------------------------------------------- 1 | from google_images_download import google_images_download 2 | 3 | chrome_driver_path = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver.exe" 4 | out_put_path = "E:/face_rec/yy_face_demand/cartoon_sample/" 5 | out_put_path = "E:/face_rec/short_vedio_famous_people/people_lst/" 6 | out_put_path = "E:/people_detection/test_datasets/" 7 | 8 | 9 | def dw(s_keyword): 10 | """ 11 | :param s_keyword: like "pet cat images, pet dog images" 12 | :return: None 13 | """ 14 | # class instantiation 15 | response = google_images_download.googleimagesdownload() 16 | 17 | # creating list of arguments 18 | arguments = {"keywords": s_keyword, 19 | "limit": 200, "print_urls": True, 20 | "output_directory": out_put_path, 21 | "chromedriver": chrome_driver_path} 22 | 23 | # passing the arguments to the function 24 | paths = response.download(arguments) 25 | 26 | # printing absolute paths of the downloaded images 27 | print(paths) 28 | 29 | 30 | def do_dw(): 31 | lst_keywords = ["pedestrian images" 32 | ] 33 | dw(lst_keywords[0]) 34 | pass 35 | 36 | 37 | def dw_famous(): 38 | f_path = 'E:/face_rec/short_vedio_famous_people/famous_list.txt' 39 | lst_famous = open(f_path).read().split('\n') 40 | 41 | # print len(lst_famous) 42 | # print ",".join(lst_famous[-3:]) 43 | 44 | s_key_word = ",".join(lst_famous) 45 | dw(s_key_word) 46 | 47 | 48 | if __name__ == '__main__': 49 | dw_famous() 50 | # do_dw() 51 | pass 52 | -------------------------------------------------------------------------------- /python_utils/http_basic/socket_basic/basic_client.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import socket 3 | import time 4 | 5 | 6 | def basic_block(): 7 | s = socket.socket() 8 | s.connect(('www.baidu.com', 80)) 9 | print("We are connected to %s:%d" % s.getpeername()) 10 | # We are connected to 61.135.169.121:80 11 | 12 | 13 | def basic_non_block(): 14 | s = socket.socket() 15 | s.setblocking(0) 16 | 17 | try: 18 | s.connect(('http://vis-www.cs.umass.edu', 80)) 19 | except socket.error as e: 20 | print(str(e)) 21 | i = 0 22 | while True: 23 | try: 24 | print("We are connected to %s:%d" % s.getpeername()) 25 | break 26 | except: 27 | print("Let's do some math while waiting: %d" % i) 28 | i += 1 29 | else: 30 | print("We are connected to %s:%d" % s.getpeername()) 31 | 32 | 33 | def basic_connect_rcv(): 34 | """ 35 | connect to server and receive msg from server 36 | :return: 37 | """ 38 | s = socket.socket() 39 | s.connect(('127.0.0.1', 8888)) 40 | # s.connect(('221.228.106.244', 8888)) 41 | print("We are connected to %s:%d" % s.getpeername()) 42 | # s_rcv = str(s.recv(1024 * 1024)) 43 | # print 'recv, len: %s, %s' % (len(s_rcv), s_rcv) 44 | 45 | buf = '123456789 -> ' 46 | # time.sleep(68) 47 | for i in xrange(5): 48 | n = s.send(buf) 49 | print "now send %s" % n 50 | time.sleep(2) 51 | 52 | if __name__ == '__main__': 53 | # basic_block() 54 | # basic_non_block() 55 | basic_connect_rcv() 56 | pass -------------------------------------------------------------------------------- /python_utils/sk_sc_pd_operator/pd_index.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import pandas as pd 4 | 5 | 6 | def test_index_1(): 7 | idx = pd.Index([2, 3, 7, 5, 3]) 8 | print idx 9 | # Int64Index([2, 3, 7, 5, 3], dtype='int64') 10 | print idx[1] 11 | # 3 12 | print idx[::2] 13 | # Int64Index([2, 7, 3], dtype='int64') 14 | 15 | print idx.size, idx.shape, idx.ndim, idx.dtype 16 | # 5 (5L,) 1 int64 17 | 18 | 19 | def test_index_2(): 20 | idx_1 = pd.Index([1, 3, 5, 7, 9]) 21 | idx_2 = pd.Index([2, 3, 5, 7, 11]) 22 | 23 | print idx_1 & idx_2 24 | # Int64Index([3, 5, 7], dtype='int64') 25 | print idx_1 | idx_2 26 | # Int64Index([1, 2, 3, 5, 7, 9, 11], dtype='int64') 27 | print idx_1 ^ idx_2 28 | # Int64Index([1, 2, 9, 11], dtype='int64') 29 | 30 | 31 | def rename_columns(): 32 | df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) 33 | print df 34 | # A B C 35 | # 0 1 4 7 36 | # 1 2 5 8 37 | # 2 3 6 9 38 | 39 | # df = df.rename(columns={"A": "a"}, inplace=True) 40 | df.rename(columns={"B": "b"}, inplace=True) 41 | print df 42 | # A b C 43 | # 0 1 4 7 44 | # 1 2 5 8 45 | # 2 3 6 9 46 | 47 | df.columns = list('abc') 48 | 49 | print df 50 | # a b c 51 | # 0 1 4 7 52 | # 1 2 5 8 53 | # 2 3 6 9 54 | 55 | df.columns.values[2] = 'C' 56 | print df 57 | # a b C 58 | # 0 1 4 7 59 | # 1 2 5 8 60 | # 2 3 6 9 61 | 62 | if __name__ == '__main__': 63 | rename_columns() 64 | # test_index_2() 65 | # test_index_1() 66 | pass 67 | -------------------------------------------------------------------------------- /python_utils/machine_learn/decision_tree/dtree_scratch.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import numpy as np 4 | 5 | 6 | class DTree(object): 7 | def __init__(self): 8 | pass 9 | 10 | def _cal_entropy(self, arr_prob): 11 | """ 12 | for example arr_prob like [0.5, 0.5] 13 | return -1 * 0.5 *log0.5 + -1 * 0.5 * log0.5 = 1 14 | :param arr_prob: one dimension probability array 15 | :return: entropy 16 | """ 17 | # -1 * sum(Pi * logPi) 18 | return np.sum(-1 * np.log2(arr_prob) * arr_prob) 19 | 20 | def _cal_conditional_entropy(self, X, Y): 21 | """ 22 | calculate conditional entropy H(D|A) 23 | :return: 24 | """ 25 | pass 26 | 27 | def _cal_class_entropy(self, y): 28 | """ 29 | calculate data set entropy 30 | 31 | :param y: 32 | :return: 33 | """ 34 | num = len(y) 35 | print num # 15 36 | unique_class, counter = np.unique(y, return_counts=True) 37 | print unique_class, counter 38 | # [0 1] [6 9] 39 | # calculate each class probability 40 | class_prob = [c * 1.0 / num for c in counter] 41 | print class_prob 42 | # [0.40000000000000002, 0.59999999999999998] 43 | print self._cal_entropy(class_prob) 44 | # 0.970950594455 45 | 46 | 47 | def test_cal_class_entropy(): 48 | from create_data import get_loan_data_lh 49 | X, Y = get_loan_data_lh() 50 | dt = DTree() 51 | dt._cal_class_entropy(Y) 52 | 53 | if __name__ == '__main__': 54 | test_cal_class_entropy() 55 | # dt = DTree() 56 | 57 | pass 58 | -------------------------------------------------------------------------------- /python_utils/machine_learn/knearest/knn_scratch.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | """ 3 | implement knn from scratch 4 | """ 5 | from collections import Counter 6 | import numpy as np 7 | 8 | 9 | class KnnScratch(object): 10 | 11 | def fit(self, x_train, y_train): 12 | self.x_train = x_train 13 | self.y_train = y_train 14 | 15 | def predict_once(self, x_test, k): 16 | lst_distance = [] 17 | lst_predict = [] 18 | 19 | for i in xrange(len(self.x_train)): 20 | # euclidean distance 21 | distance = np.linalg.norm(x_test - self.x_train[i, :]) 22 | # distance = np.sqrt(np.sum(np.square(x_test, x_train[i, :]))) 23 | lst_distance.append([distance, i]) 24 | 25 | lst_distance = sorted(lst_distance) 26 | 27 | for i in xrange(k): 28 | idx = lst_distance[i][1] 29 | lst_predict.append(self.y_train[idx]) 30 | 31 | return Counter(lst_predict).most_common(1)[0][0] 32 | 33 | def predict(self, x_test, k): 34 | lst_predict = [] 35 | for i in xrange(len(x_test)): 36 | lst_predict.append(self.predict_once(x_test[i, :], k)) 37 | 38 | return lst_predict 39 | 40 | if __name__ == '__main__': 41 | x_train = np.array([[1, 1, 1], [2, 2, 2], [10, 10, 10], [13, 13, 13]]) 42 | y_train = ['aa', 'aa', 'bb', 'bb'] 43 | x_test = np.array([[3, 2, 4], [9, 13, 11]]) 44 | 45 | k = 2 46 | knn = KnnScratch() 47 | knn.fit(x_train, y_train) 48 | 49 | print knn.predict_once(x_test[0], k) 50 | # aa 51 | 52 | print knn.predict(x_test, k) 53 | # ['aa', 'bb'] 54 | 55 | pass 56 | -------------------------------------------------------------------------------- /python_utils/thread_process/pool_dummy.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | """ 3 | This file is about thread(dummy)/process pool 4 | """ 5 | from multiprocessing import Pool as ProcessPool 6 | from multiprocessing.dummy import Pool as ThreadPool 7 | import logging 8 | from time import sleep, time 9 | from random import randrange 10 | 11 | 12 | # logging.basicConfig(level=logging.DEBUG, 13 | # format='%(levelname)s %(asctime)s %(threadName)s %(message)s', 14 | # datefmt='%Y-%m-%d %I:%M:%S') 15 | 16 | logging.basicConfig(level=logging.DEBUG, 17 | format='%(levelname)s %(asctime)s %(processName)s %(message)s', 18 | datefmt='%Y-%m-%d %I:%M:%S') 19 | 20 | 21 | def handler(sec): 22 | logging.debug('now I will sleep %s S', sec) 23 | sleep(sec) 24 | 25 | 26 | def get_pool(b_dummy=True, num=4): 27 | """ 28 | if b_dummy is True then get ThreadPool, or get process pool 29 | :param b_dummy: dummy thread Pool or Process pool 30 | :param num: thread or process num 31 | :return: pool object 32 | """ 33 | if b_dummy: 34 | pool = ThreadPool(num) 35 | else: 36 | pool = ProcessPool(num) 37 | 38 | return pool 39 | 40 | 41 | def test_dummy_thread_pool(): 42 | start_time = time() 43 | lst_sleep_sec = [randrange(3, 10) for i in xrange(10)] 44 | pool = get_pool(b_dummy=False) 45 | 46 | results = pool.map(handler, lst_sleep_sec) 47 | logging.debug(results) 48 | pool.close() 49 | pool.join() 50 | logging.debug('time consume %s', time() - start_time) 51 | pass 52 | 53 | 54 | if __name__ == '__main__': 55 | test_dummy_thread_pool() 56 | pass 57 | -------------------------------------------------------------------------------- /python_utils/machine_learn/Bayes/bayes_sklearn.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | """ 3 | Data set from 4 | https://archive.ics.uci.edu/ml/datasets/Pima+Indians+Diabetes 5 | https://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data 6 | 7 | """ 8 | import numpy as np 9 | from sklearn.naive_bayes import GaussianNB, BernoulliNB 10 | import pandas as pd 11 | from sklearn.model_selection import train_test_split, cross_val_score 12 | 13 | 14 | def sk_demo_1(): 15 | X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) 16 | Y = np.array([1, 1, 1, 2, 2, 2]) 17 | clf = GaussianNB() 18 | clf.fit(X, Y) 19 | test_item = np.array([[-0.8, -1]]) 20 | print clf.predict(test_item) 21 | # [1] 22 | print clf.get_params() 23 | 24 | 25 | def load_diabetes_data(): 26 | path = '../dataset/bayes/pima-indians-diabetes.txt' 27 | df = pd.read_csv(path, header=None) 28 | print df.head() 29 | # the below get 9 columns, not 8 why? 30 | # x = np.array(df.ix[:, 0:8]) 31 | x = np.array(df.ix[:, 0:7]) 32 | print x.shape, x[0] 33 | y = np.array(df.ix[:, 8]) 34 | print y.shape, y[0] 35 | 36 | return train_test_split(x, y, test_size=0.33, random_state=40) 37 | 38 | 39 | def sk_nb_diabetes(): 40 | x_train, x_test, y_train, y_test = load_diabetes_data() 41 | clf = GaussianNB() 42 | 43 | 44 | def sk_bernoulli_demo(): 45 | x = np.random.randint(2, size=(6, 100)) 46 | y = np.array([1, 2, 3, 4, 4, 5]) 47 | clf = BernoulliNB() 48 | clf.fit(x, y) 49 | # print clf.predict(x[2:3]) 50 | print clf.predict(x[2]) 51 | 52 | if __name__ == '__main__': 53 | # sk_demo_1() 54 | # load_diabetes_data() 55 | sk_bernoulli_demo() 56 | pass 57 | -------------------------------------------------------------------------------- /python_utils/py_basic/calendar_ope.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | import datetime 3 | import calendar 4 | from dateutil import rrule 5 | 6 | 7 | def get_all_day_v1(): 8 | from datetime import datetime 9 | d1 = '20171030' 10 | d2 = '20171102' 11 | for dt in rrule.rrule(rrule.DAILY, 12 | dtstart=datetime.strptime(d1, '%Y%m%d'), 13 | until=datetime.strptime(d2, '%Y%m%d')): 14 | print dt.strftime('%Y%m%d') 15 | 16 | # 20171030 20171031 20171101 20171102 17 | 18 | 19 | def get_all_day_v2(): 20 | d1 = datetime.date(2017, 10, 30) 21 | d2 = datetime.date(2017, 11, 2) 22 | delta = d2 - d1 23 | print delta.days 24 | # 3 25 | days = [d1 + datetime.timedelta(days=x) for x in range((d2-d1).days + 1)] 26 | for day in days: 27 | print(day.strftime('%Y%m%d')) 28 | 29 | # 20171030 20171031 20171101 20171102 30 | 31 | 32 | def tb_partition_sql(): 33 | """ 34 | mysql partition table by day in month 35 | :return: 36 | """ 37 | sql = """PARTITION p%s VALUES LESS THAN (TO_DAYS('%s')) ENGINE = InnoDB,""" 38 | d1 = datetime.date(2018, 12, 1) 39 | d2 = datetime.date(2018, 12, 31) 40 | days = [d1 + datetime.timedelta(days=x) for x in range((d2-d1).days + 2)] 41 | # print len(days) 42 | for i in xrange(len(days) - 1): 43 | s1 = days[i].strftime('%Y%m%d') 44 | s2 = days[i + 1].strftime('%Y-%m-%d') 45 | print sql % (s1, s2) 46 | 47 | # PARTITION p20171201 VALUES LESS THAN (TO_DAYS('2017-12-02')) ENGINE = InnoDB, 48 | # ........... 49 | # PARTITION p20171231 VALUES LESS THAN (TO_DAYS('2018-01-01')) ENGINE = InnoDB, 50 | 51 | if __name__ == '__main__': 52 | # get_all_day_v1() 53 | # get_all_day_v2() 54 | tb_partition_sql() 55 | pass 56 | -------------------------------------------------------------------------------- /python_utils/sk_sc_pd_operator/pd_visualize_diamond.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | """ 4 | data set from 5 | https://github.com/tidyverse/ggplot2/blob/master/data-raw/diamonds.csv 6 | """ 7 | 8 | import numpy as np 9 | import pandas as pd 10 | import matplotlib.pyplot as plt 11 | import seaborn as sns 12 | sns.set() 13 | 14 | 15 | def load_data(): 16 | f_path = 'E:/python_code/tc_competition/diamond/diamonds.csv' 17 | df_data = pd.read_csv(f_path) 18 | 19 | print df_data.columns 20 | 21 | print df_data.describe() 22 | 23 | print df_data['clarity'].value_counts() 24 | 25 | my_tab = pd.crosstab(index=df_data["clarity"], # Make a crosstab 26 | columns="count") # Name the count column 27 | 28 | # my_tab.plot.bar() 29 | # plt.show() 30 | 31 | print my_tab.sum() # # Sum the counts 32 | 33 | print my_tab.shape # Check number of rows and cols 34 | 35 | print my_tab.iloc[1:7] # Slice rows 1-6 36 | 37 | print my_tab / my_tab.sum() 38 | 39 | # df_data.boxplot(column="price", # Column to plot 40 | # by="clarity", # Column to split upon 41 | # figsize=(8, 8)) # Figure size 42 | 43 | # plt.show() 44 | print '==========================' 45 | # two-way table 46 | grouped = df_data.groupby(['cut', 'clarity']) 47 | print grouped.size() 48 | 49 | print '==========================' 50 | clarity_color_table = pd.crosstab(index=df_data["clarity"], 51 | columns=df_data["color"]) 52 | 53 | print clarity_color_table 54 | 55 | clarity_color_table.plot(kind="bar", 56 | figsize=(8, 8), 57 | stacked=True) 58 | plt.show() 59 | 60 | 61 | if __name__ == '__main__': 62 | load_data() 63 | pass 64 | -------------------------------------------------------------------------------- /python_utils/DbService/mysql_db/DbSubService.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | from DbBase import DbBase 4 | 5 | 6 | class DbSubService(DbBase): 7 | def __init__(self, **kwargs): 8 | super(DbSubService, self).__init__(**kwargs) 9 | 10 | def query(self): 11 | pass 12 | 13 | def count(self, tb): 14 | """ 15 | :param tb: 16 | :return: table rows count 17 | """ 18 | query_sql = ' select count(*) from %s ' % tb 19 | self.cursor.execute(query_sql) 20 | res = self.cursor.fetchone() 21 | print res[0] 22 | return res[0] 23 | 24 | def get_liver_info(self, limit_start, limit_size): 25 | query = """select reported_uid, 26 | sum(audit_status='S01') as audit_status_S01, 27 | sum(audit_status='S02') as audit_status_S02, 28 | sum(audit_status='S03') as audit_status_S03, 29 | sum(audit_status='S04') as audit_status_S04, 30 | sum(audit_status='S05') as audit_status_S05, 31 | count(*) as audit_status_all from iboms.tb_ms_mobile_report_test 32 | group by reported_uid 33 | limit %s, %s""" % (limit_start, limit_size) 34 | 35 | self.cursor.execute(query) 36 | return [row for row in self.cursor] 37 | 38 | def bulk_update(self, lst): 39 | """ 40 | batch updates 41 | [("new_value" , "3"),("new_value" , "6")] 42 | :param lst: 43 | :return: 44 | """ 45 | query = """UPDATE Writers SET Name = %s WHERE Id = %s""" 46 | self.cursor.executemany(query, lst) 47 | self.conn.commit() 48 | 49 | 50 | if __name__ == '__main__': 51 | db = DbSubService(db_config_file='../config/mysql_config.json') 52 | tb = 'tb_test' 53 | db.count(tb) 54 | -------------------------------------------------------------------------------- /python_utils/numpy_operate/arr_sort.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | import numpy as np 3 | 4 | 5 | def arr_arg_sort(): 6 | arr = np.random.permutation(3 * 4).reshape(3, 4) 7 | np.random.shuffle(arr) 8 | print arr 9 | arr_sort_idx = np.argsort(arr) # default sort by row 10 | print arr_sort_idx 11 | # print np.array(arr)[arr_sort_idx] # does not apply to mul-dim array 12 | arr_sort_idx = np.argsort(arr, axis=0) # sort by column 13 | print arr_sort_idx 14 | x = np.array([0, 2, 1]) 15 | print np.argsort(x) 16 | # [0 2 1] ascending order 17 | print np.argsort(-x) 18 | # [1 2 0] descending order 19 | 20 | arr = np.array([4, 1, 3, 5]) 21 | print arr, arr[arr.argsort()] 22 | # [4 1 3 5] [1 3 4 5] 23 | print arr.argsort() 24 | # [1 2 0 3] 25 | print np.argsort(-arr), arr[np.argsort(-arr)] 26 | # [3 0 2 1], [5 4 3 1] 27 | 28 | 29 | def arr_sort(): 30 | arr = np.random.permutation(3 * 4).reshape(3, 4) 31 | print arr 32 | # [[11 4 6 1] 33 | # [10 0 2 9] 34 | # [ 8 7 5 3]] 35 | arr.sort() 36 | print 'after sort \n', arr 37 | # [[ 1 4 6 11] 38 | # [ 0 2 9 10] 39 | # [ 3 5 7 8]] 40 | print np.sort(arr) # the result is as some as arr.sort(),default sort by row 41 | print np.sort(arr, axis=0) # sort by column, axis=0 means column 42 | pass 43 | 44 | 45 | def arr_sum(): 46 | arr = np.arange(6).reshape((2, 3)) 47 | print arr 48 | # [[0 1 2] 49 | # [3 4 5]] 50 | print arr.sum(axis=0) 51 | # [3 5 7] 52 | print arr.sum(axis=1) 53 | # [ 3 12] 54 | 55 | print arr > 1 56 | # [[False False True] 57 | # [ True True True]] 58 | print arr[::-1] 59 | # [[3 4 5] 60 | # [0 1 2]] 61 | 62 | if __name__ == '__main__': 63 | # arr_sum() 64 | arr_arg_sort() 65 | # arr_sort() 66 | pass 67 | -------------------------------------------------------------------------------- /python_utils/py_basic/operator_ope.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | """ 3 | python operator module usage, cited from 4 | https://docs.python.org/2/library/operator.html 5 | """ 6 | 7 | import operator 8 | 9 | 10 | def cmp_fun(): 11 | a, b = 5, 3 12 | print operator.le(a, b) 13 | # False 14 | print operator.gt(a, b) 15 | # True 16 | 17 | 18 | def lst_ope(): 19 | lst = [1, 2, 3] 20 | print operator.indexOf(lst, 2) 21 | # 1 22 | lst1 = [1, 2, 3, 2] 23 | print operator.countOf(lst1, 2) 24 | # 2 25 | 26 | 27 | def cal_ope(): 28 | lst1 = [0, 1, 2, 3] 29 | lst2 = [10, 20, 30, 40] 30 | print map(operator.mul, lst1, lst2) 31 | # [0, 20, 60, 120] 32 | 33 | print sum(map(operator.mul, lst1, lst2)) 34 | # 200 35 | 36 | a, b = 1, 3 37 | print operator.iadd(a, b) 38 | # 4 39 | 40 | 41 | def item_ope(): 42 | s = ['h', 'e', 'l', 'l', 'o'] 43 | print operator.getitem(s, 1) 44 | # e 45 | print operator.itemgetter(1, 4)(s) 46 | # ('e', 'o') 47 | 48 | inventory = [('apple', 3), ('banana', 2), ('pear', 5), ('orange', 1)] 49 | get_count = operator.itemgetter(1) 50 | print map(get_count, inventory) 51 | # [3, 2, 5, 1] 52 | 53 | print sorted(inventory, key=get_count) 54 | # [('orange', 1), ('banana', 2), ('apple', 3), ('pear', 5)] 55 | 56 | 57 | def reduce_ope(): 58 | a = [2, 3, 4, 5] 59 | print reduce(lambda x, y: x + y, a) 60 | # 14 61 | print reduce(operator.add, a) 62 | # 14 63 | 64 | lst = [3, 2, 3] 65 | print reduce(operator.xor, lst) 66 | # 2 67 | 68 | # use reduce with init value, sum from init 69 | lst = [1, 2, 3] 70 | print reduce(operator.add, lst, 10) 71 | # 16 72 | 73 | 74 | if __name__ == '__main__': 75 | reduce_ope() 76 | # item_ope() 77 | # cal_ope() 78 | # lst_ope() 79 | # cmp_fun() 80 | pass 81 | -------------------------------------------------------------------------------- /python_utils/machine_learn/dataset/cluster/cluster_txt: -------------------------------------------------------------------------------- 1 | 1.658985 4.285136 2 | -3.453687 3.424321 3 | 4.838138 -1.151539 4 | -5.379713 -3.362104 5 | 0.972564 2.924086 6 | -3.567919 1.531611 7 | 0.450614 -3.302219 8 | -3.487105 -1.724432 9 | 2.668759 1.594842 10 | -3.156485 3.191137 11 | 3.165506 -3.999838 12 | -2.786837 -3.099354 13 | 4.208187 2.984927 14 | -2.123337 2.943366 15 | 0.704199 -0.479481 16 | -0.392370 -3.963704 17 | 2.831667 1.574018 18 | -0.790153 3.343144 19 | 2.943496 -3.357075 20 | -3.195883 -2.283926 21 | 2.336445 2.875106 22 | -1.786345 2.554248 23 | 2.190101 -1.906020 24 | -3.403367 -2.778288 25 | 1.778124 3.880832 26 | -1.688346 2.230267 27 | 2.592976 -2.054368 28 | -4.007257 -3.207066 29 | 2.257734 3.387564 30 | -2.679011 0.785119 31 | 0.939512 -4.023563 32 | -3.674424 -2.261084 33 | 2.046259 2.735279 34 | -3.189470 1.780269 35 | 4.372646 -0.822248 36 | -2.579316 -3.497576 37 | 1.889034 5.190400 38 | -0.798747 2.185588 39 | 2.836520 -2.658556 40 | -3.837877 -3.253815 41 | 2.096701 3.886007 42 | -2.709034 2.923887 43 | 3.367037 -3.184789 44 | -2.121479 -4.232586 45 | 2.329546 3.179764 46 | -3.284816 3.273099 47 | 3.091414 -3.815232 48 | -3.762093 -2.432191 49 | 3.542056 2.778832 50 | -1.736822 4.241041 51 | 2.127073 -2.983680 52 | -4.323818 -3.938116 53 | 3.792121 5.135768 54 | -4.786473 3.358547 55 | 2.624081 -3.260715 56 | -4.009299 -2.978115 57 | 2.493525 1.963710 58 | -2.513661 2.642162 59 | 1.864375 -3.176309 60 | -3.171184 -3.572452 61 | 2.894220 2.489128 62 | -2.562539 2.884438 63 | 3.491078 -3.947487 64 | -2.565729 -2.012114 65 | 3.332948 3.983102 66 | -1.616805 3.573188 67 | 2.280615 -2.559444 68 | -2.651229 -3.103198 69 | 2.321395 3.154987 70 | -1.685703 2.939697 71 | 3.031012 -3.620252 72 | -4.599622 -2.185829 73 | 4.196223 1.126677 74 | -2.133863 3.093686 75 | 4.668892 -2.562705 76 | -2.793241 -2.149706 77 | 2.884105 3.043438 78 | -2.967647 2.848696 79 | 4.479332 -1.764772 80 | -4.905566 -2.911070 -------------------------------------------------------------------------------- /python_utils/numpy_operate/algebra_op.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | """ 3 | This file is about linear algebra operator 4 | """ 5 | import numpy as np 6 | 7 | 8 | def mean(): 9 | arr = np.arange(12).reshape((3, 4)) 10 | print arr 11 | # [[ 0 1 2 3] 12 | # [ 4 5 6 7] 13 | # [ 8 9 10 11]] 14 | mean_arr = np.mean(arr, axis=0) 15 | print mean_arr 16 | # [ 4. 5. 6. 7.] 17 | 18 | print arr - mean_arr 19 | # [[-4. -4. -4. -4.] 20 | # [ 0. 0. 0. 0.] 21 | # [ 4. 4. 4. 4.]] 22 | print np.mean(arr) 23 | # 5.5 24 | print np.mean(arr, axis=1) 25 | # [ 1.5 5.5 9.5] 26 | 27 | 28 | def covariance(): 29 | arr = np.arange(12).reshape((4, 3)) 30 | print arr 31 | mean_arr = np.mean(arr, axis=0) 32 | print mean_arr 33 | cov_arr = np.cov(mean_arr, rowvar=0) 34 | print cov_arr 35 | mean_sub_arr = arr - mean_arr 36 | print mean_sub_arr 37 | print np.cov(mean_sub_arr, rowvar=0) 38 | print np.var(mean_sub_arr, 0) 39 | 40 | a = np.array([[1, 2], [3, 4]]) 41 | print np.var(a) 42 | # 1.25 get variance of all the element_wise 43 | print np.var(a, 0) 44 | # [ 1. 1.] get variance of every column 45 | print np.var(a, 1) 46 | # [ 0.25 0.25] get variance of every row 47 | 48 | 49 | def eigen_vec_val(): 50 | arr1 = np.array([[-1, 0], [0, 1]]) 51 | eig_val1, eig_vec1 = np.linalg.eig(arr1) 52 | # print eig_val1 53 | # print eig_vec1 54 | 55 | arr2 = np.random.randint(1, 10, size=(3, 3)) 56 | eig_val2, eig_vec2 = np.linalg.eig(arr2) 57 | print eig_val2 58 | print eig_vec2 59 | eig_val2_idx = np.argsort(eig_val2) 60 | print eig_val2_idx 61 | print eig_val2_idx[::-1] 62 | print eig_vec2[:, eig_val2_idx] 63 | print eig_vec2[:, eig_val2_idx[::-1]] 64 | 65 | if __name__ == '__main__': 66 | mean() 67 | # covariance() 68 | # eigen_vec_val() 69 | pass 70 | -------------------------------------------------------------------------------- /python_utils/numpy_operate/log2_op.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import numpy as np 4 | 5 | 6 | def log2_test(): 7 | arr = np.array([0, 1, 2, 3, 2 ** 4]) 8 | print np.log2(arr) 9 | # [-inf, 0, 1, 1.5849625, 4] 10 | 11 | arr = np.array([1, 2, 4, 2 ** 3]) 12 | arr_lg = np.log2(arr) 13 | print arr_lg 14 | # [ 0. 1. 2. 3.] 15 | print arr_lg * arr 16 | # [ 0. 2. 8. 24.] 17 | 18 | # calculate entropy 19 | print np.sum(arr_lg * arr) 20 | # 34 21 | 22 | 23 | def cal_entropy(): 24 | arr1 = np.array([0.5, 0.5]) 25 | print np.log2(arr1) 26 | # [-1. -1.] 27 | 28 | print np.sum(np.log2(arr1) * arr1) 29 | # -1.0 30 | 31 | print np.sum(-1 * np.log2(arr1) * arr1) 32 | # 1.0 33 | 34 | arr2 = np.array([0.1, 0.9]) 35 | print np.sum(-1 * np.log2(arr2) * arr2) 36 | # 0.468995593589 37 | 38 | 39 | class SoftmaxLayer: 40 | def __init__(self, name='Softmax'): 41 | pass 42 | 43 | def forward(self, in_data): 44 | shift_scores = in_data - np.max(in_data, axis=1).reshape(-1, 1) 45 | self.top_val = np.exp(shift_scores) / np.sum(np.exp(shift_scores), axis=1).reshape(-1, 1) 46 | return self.top_val 47 | 48 | def backward(self, residual): 49 | N = residual.shape[0] 50 | dscores = self.top_val.copy() 51 | dscores[range(N), list(residual)] -= 1 52 | dscores /= N 53 | return dscores 54 | 55 | 56 | def test_log(): 57 | arr1 = np.array([[-0.1, -0.2, -0.3], [0.1, 0.2, 0.4]]) 58 | sl = SoftmaxLayer() 59 | # print sl.forward(arr1) 60 | 61 | # print np.max(arr1, axis=1).reshape(-1, 1) 62 | 63 | arr2 = np.array([[1, 2, 3], [-1, -2, -4]]) 64 | # print sl.forward(arr2) 65 | 66 | arr_base = np.array([2, 2, np.e]) 67 | arr_log = np.array([1, 2, 4]) 68 | print np.log([arr_base, arr_log]) 69 | 70 | 71 | if __name__ == '__main__': 72 | test_log() 73 | # log2_test() 74 | # cal_entropy() 75 | pass 76 | -------------------------------------------------------------------------------- /python_utils/machine_learn/decision_tree/dtree_sklearn.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | """ 4 | data set from: 5 | http://archive.ics.uci.edu/ml/datasets/banknote+authentication 6 | http://archive.ics.uci.edu/ml/machine-learning-databases/00267/data_banknote_authentication.txt 7 | """ 8 | 9 | from sklearn.datasets import load_iris 10 | from sklearn import tree 11 | from sklearn.tree import export_graphviz 12 | import subprocess 13 | 14 | from create_data import get_loan_data_lh 15 | 16 | 17 | def visualize_tree(tree, feature_name, dot_file): 18 | """Create tree png using graphviz. 19 | tree -- scikit-learn DecsisionTree. 20 | feature_names -- list of feature names. 21 | dot_file -- dot file name and path 22 | """ 23 | with open("tree.dot", 'w') as f: 24 | export_graphviz(tree, out_file=f, 25 | feature_names=feature_name) 26 | 27 | dt_png = dot_file.replace('dot', 'png') 28 | command = ["dot", "-Tpng", dot_file, "-o", dt_png] 29 | try: 30 | subprocess.check_call(command) 31 | except Exception as e: 32 | print e 33 | exit("Could not run dot, ie graphviz, to " 34 | "produce visualization") 35 | 36 | 37 | def iris_demo(): 38 | clf = tree.DecisionTreeClassifier() 39 | iris = load_iris() 40 | # iris.data属性150*4,iris.target 类别归一化为了0,1,2(150*1) 41 | clf = clf.fit(iris.data, iris.target) 42 | dot_file = 'tree.dot' 43 | tree.export_graphviz(clf, out_file=dot_file) 44 | visualize_tree(clf, iris.feature_names, dot_file) 45 | 46 | # (graph,) = pydot.graph_from_dot_file('tree.dot') 47 | # graph.write_png('somefile.png') 48 | 49 | 50 | def loan_demo(): 51 | dt = tree.DecisionTreeClassifier() 52 | X, Y = get_loan_data_lh() 53 | dt = dt.fit(X, Y) 54 | dot_file = 'loan.dot' 55 | tree.export_graphviz(dt, out_file=dot_file) 56 | feature_names = ['age', 'has work', 'own house', 'loan level'] 57 | visualize_tree(dt, feature_names, dot_file) 58 | 59 | 60 | if __name__ == '__main__': 61 | # iris_demo() 62 | loan_demo() 63 | pass 64 | -------------------------------------------------------------------------------- /python_utils/sk_sc_pd_operator/pd_concat_join.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | 7 | def np_arr_concat(): 8 | x = [1, 2, 3] 9 | y = [4, 5, 6] 10 | z = [7, 8, 9] 11 | 12 | print np.concatenate([x, y, z]) 13 | # [1 2 3 4 5 6 7 8 9] 14 | 15 | x = [[1, 2], 16 | [3, 4]] 17 | print np.concatenate([x, x]) 18 | # [[1 2] 19 | # [3 4] 20 | # [1 2] 21 | # [3 4]] 22 | 23 | print np.concatenate([x, x], axis=1) 24 | # [[1 2 1 2] 25 | # [3 4 3 4]] 26 | 27 | 28 | def make_df(cols, idx): 29 | """ 30 | make_df('ABC', range(3)) 31 | cols->ABC, idx -> [0, 1, 2] 32 | return 33 | A B C 34 | 0 A0 B0 C0 35 | 1 A1 B1 C1 36 | 2 A2 B2 C2 37 | """ 38 | data = {c:[str(c) + str(i) for i in idx] for c in cols} 39 | 40 | return pd.DataFrame(data, idx) 41 | 42 | 43 | def series_concat(): 44 | ser1 = pd.Series(['A', 'B', 'C'], index=[1, 2, 3]) 45 | ser2 = pd.Series(['D', 'E', 'F'], index=[4, 5, 6]) 46 | print pd.concat([ser1, ser2]) 47 | # 1 A 48 | # 2 B 49 | # 3 C 50 | # 4 D 51 | # 5 E 52 | # 6 F 53 | # dtype: object 54 | 55 | 56 | def df_concat(): 57 | df1 = make_df('AB', [1, 2]) 58 | df2 = make_df('AB', [3, 4]) 59 | print df1 60 | # A B 61 | # 1 A1 B1 62 | # 2 A2 B2 63 | print df2 64 | # A B 65 | # 3 A3 B3 66 | # 4 A4 B4 67 | print pd.concat([df1, df2]) 68 | # A B 69 | # 1 A1 B1 70 | # 2 A2 B2 71 | # 3 A3 B3 72 | # 4 A4 B4 73 | 74 | df3 = make_df('AB', [0, 1]) 75 | df4 = make_df('CD', [0, 1]) 76 | 77 | print df3 78 | # A B 79 | # 0 A0 B0 80 | # 1 A1 B1 81 | print df4 82 | # C D 83 | # 0 C0 D0 84 | # 1 C1 D1 85 | print pd.concat([df3, df4], axis=1) 86 | # A B C D 87 | # 0 A0 B0 C0 D0 88 | # 1 A1 B1 C1 D1 89 | 90 | if __name__ == '__main__': 91 | df_concat() 92 | # series_concat() 93 | # np_arr_concat() 94 | pass 95 | -------------------------------------------------------------------------------- /python_utils/machine_learn/neural_network_keras/cnn_keras_digits.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | """ 4 | This file is mainly use cnn to recognize digit 5 | """ 6 | 7 | import numpy as np 8 | from keras.models import Sequential 9 | from keras.layers import Dense, Dropout, Flatten 10 | from keras.layers.convolutional import Conv2D 11 | from keras.layers.convolutional import MaxPooling2D 12 | from keras.utils import np_utils 13 | from keras import backend as K 14 | K.set_image_dim_ordering('th') 15 | 16 | from nn_keras_digits import load_data 17 | 18 | 19 | def pre_process_data(): 20 | (X_train, y_train), (X_test, y_test) = load_data() 21 | # reshape to be [samples][pixels][width][height] 22 | X_train = X_train.reshape(X_train.shape[0], 1, 28, 28).astype('float32') 23 | # X_train.shape -> (60000L, 28L, 28L) 24 | X_test = X_test.reshape(X_test.shape[0], 1, 28, 28).astype('float32') 25 | # normalize inputs from 0-255 to 0-1 26 | X_train = X_train / 255 27 | X_test = X_test / 255 28 | # one hot encode outputs 29 | y_train = np_utils.to_categorical(y_train) 30 | y_test = np_utils.to_categorical(y_test) 31 | num_classes = y_test.shape[1] 32 | return X_train, y_train, X_test, y_test 33 | 34 | 35 | def baseline_model(): 36 | model = Sequential() 37 | model.add(Conv2D(32, (5, 5), input_shape=(1, 28, 28), activation='relu')) 38 | model.add(MaxPooling2D(pool_size=(2, 2))) 39 | model.add(Dropout(0.2)) 40 | model.add(Flatten()) 41 | model.add(Dense(128, activation='relu')) 42 | model.add(Dense(10, activation='softmax')) 43 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 44 | return model 45 | 46 | 47 | def train_and_evaluate(): 48 | X_train, y_train, X_test, y_test = pre_process_data() 49 | model = baseline_model() 50 | model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2) 51 | scores = model.evaluate(X_test, y_test, verbose=0) 52 | print("Baseline Error: %.2f%%" % (100-scores[1]*100)) 53 | 54 | if __name__ == '__main__': 55 | train_and_evaluate() 56 | 57 | pass 58 | -------------------------------------------------------------------------------- /python_utils/py_basic/obj_is.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import operator 3 | 4 | 5 | def i_is_check(): 6 | a = 256 7 | b = 256 8 | print a is b 9 | # True 10 | a1 = 2571111 11 | b1 = 2571111 12 | print a1 is b1 13 | # True 14 | 15 | 16 | def max_activity(s, e): 17 | tp_lst = sorted(zip(s, e), key=lambda t: t[1] - t[0]) 18 | 19 | lst_target = [] 20 | while len(tp_lst): 21 | tp_lst.sort(key=lambda t: t[1] - t[0]) 22 | min_se = tp_lst[0] 23 | lst_target.append(min_se) 24 | tp_lst.pop(0) 25 | 26 | tp_lst = filter(lambda x: x[1] <= min_se[0] or x[0] >= min_se[1], tp_lst) 27 | 28 | # for item in lst_target: print item 29 | 30 | print "->".join([str(item) for item in lst_target]) 31 | 32 | 33 | def test_max_activity(): 34 | s = [1, 3, 0, 5, 3, 5, 6, 8, 8, 2] 35 | e = [4, 5, 6, 7, 4, 6, 9, 10, 11, 5] 36 | max_activity(s, e) 37 | # (3, 4)->(5, 6)->(8, 10) 38 | 39 | s = [1, 3, 0, 5, 3, 5, 6, 8, 8, 2, 12] 40 | e = [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] 41 | max_activity(s, e) 42 | # (3, 5)->(5, 7)->(12, 14)->(8, 11) 43 | 44 | 45 | def coin_question(coin_val, coin_count, money): 46 | sum = 0 47 | d_val_count = {} 48 | 49 | for i in range(len(coin_val))[::-1]: 50 | df = money - sum 51 | n = df / coin_val[i] 52 | n = min(coin_count[i], n) 53 | if n: 54 | sum = sum + n * coin_val[i] 55 | d_val_count[coin_val[i]] = n 56 | 57 | return d_val_count 58 | 59 | 60 | def test_coin_question(): 61 | coin_val = [1, 2, 5, 10, 20, 50, 100] 62 | coin_count = [3, 0, 2, 1, 0, 3, 5] 63 | 64 | money = 113 65 | d_val_count = coin_question(coin_val, coin_count, money) 66 | print d_val_count # {1: 3, 10: 1, 100: 1} 67 | 68 | money = 272 69 | d_val_count = coin_question(coin_val, coin_count, money) 70 | print d_val_count # {1: 2, 50: 1, 100: 2, 10: 1, 5: 2} 71 | 72 | if __name__ == '__main__': 73 | test_coin_question() 74 | # test_max_activity() 75 | # i_is_check() 76 | lst = [1, 2, 3] 77 | print range(len(lst))[::-1] 78 | print lst[::-1] 79 | pass 80 | -------------------------------------------------------------------------------- /python_utils/sk_sc_pd_operator/pd_GridSearchCV.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from sklearn import svm, datasets 6 | from sklearn.model_selection import GridSearchCV 7 | from sklearn.metrics import classification_report 8 | 9 | import matplotlib.pyplot as plt 10 | import seaborn as sns 11 | sns.set() 12 | 13 | 14 | def test_grid_search_cv(): 15 | iris = datasets.load_iris() 16 | parameters = {'kernel': ('linear', 'rbf'), 17 | 'C': [1, 2, 4], 'gamma': [0.125, 0.25, 0.5, 1, 2, 4]} 18 | 19 | svr = svm.SVC() 20 | clf = GridSearchCV(svr, parameters, n_jobs=-1) 21 | clf.fit(iris.data, iris.target) 22 | cv_result = pd.DataFrame.from_dict(clf.cv_results_) 23 | with open('cv_result.csv', 'w') as f: 24 | cv_result.to_csv(f) 25 | 26 | print('The parameters of the best model are: ') 27 | print(clf.best_params_) 28 | 29 | y_pred = clf.predict(iris.data) 30 | print(classification_report(y_true=iris.target, y_pred=y_pred)) 31 | 32 | 33 | def grid_search_cv_graph(): 34 | iris = datasets.load_digits() 35 | X = iris.data 36 | Y = iris.target 37 | 38 | C_lst = [1, 10, 100, 1000] 39 | gamma_lst = [0.125, 0.25, 0.5, 1, 2, 4] 40 | gamma_lst = [1e-3, 1e-4] 41 | 42 | parameters = {'C': C_lst, 'gamma': gamma_lst} 43 | 44 | # parameters = {'kernel': ('linear', 'rbf'), 45 | # 'C': C_lst, 'gamma': gamma_lst} 46 | 47 | clf_ = svm.SVC() 48 | clf = GridSearchCV(clf_, parameters, cv=2, n_jobs=-1) 49 | clf.fit(X, Y) 50 | 51 | print clf.best_params_ 52 | print clf.best_score_ 53 | 54 | print clf.cv_results_ 55 | 56 | # scores = [x[1] for x in clf.grid_scores_] 57 | scores = clf.cv_results_['mean_test_score'] 58 | print scores 59 | scores = np.array(scores).reshape(len(C_lst), len(gamma_lst)) 60 | 61 | for ind, i in enumerate(C_lst): 62 | plt.plot(gamma_lst, scores[ind], label='C: ' + str(i)) 63 | 64 | plt.legend() 65 | plt.xlabel('Gamma') 66 | plt.ylabel('Mean score') 67 | plt.show() 68 | 69 | if __name__ == '__main__': 70 | # test_grid_search_cv() 71 | grid_search_cv_graph() 72 | pass 73 | -------------------------------------------------------------------------------- /python_utils/http_basic/http_client_get.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | """ 4 | This file a sample demo to do http stress test 5 | """ 6 | import requests 7 | import time 8 | from multiprocessing.dummy import Pool as ThreadPool 9 | import urllib 10 | 11 | 12 | def get_ret_from_http(url): 13 | """cited from https://stackoverflow.com/questions/645312/what-is-the-quickest-way-to-http-get-in-python 14 | """ 15 | ret = requests.get(url) 16 | print ret.content 17 | # eg. result: {"error":false,"resultMap":{"check_ret":1},"success":true} 18 | 19 | 20 | def multi_process_stress_test(): 21 | """ 22 | start up 4 thread to issue 1000 http requests to server 23 | and test consume time 24 | :return: 25 | """ 26 | start = time.time() 27 | url = """http://127.0.0.1:9325/shortvideo/checkBlack?url=http%3A%2F%2Fzbasecapture.bs2.yy.com%2F42269159_1499248536403_3.jpg&serial=abcdddddddd""" 28 | url1 = """http://127.0.0.1:9325/shortvideo/checkBlack?url=http%3A%2F%2Fgenie.bs2dl.yy.com%2Ff4955aa1ab1c479256e2a2c5cdec73a6&serial=abceeeeeeee""" 29 | lst_url = [url, url1]*50 30 | pool = ThreadPool(5) 31 | ret = pool.map(get_ret_from_http, lst_url) 32 | pool.close() 33 | pool.join() 34 | print 'time consume %s' % (time.time() - start) 35 | 36 | 37 | def make_url(): 38 | """ 39 | generate url with parameter 40 | https://xy.com/index.php?url=http%3A//xy.xxx.com/22.jpg&SecretId=xy_123_move 41 | cited from https://stackoverflow.com/questions/2506379/add-params-to-given-url-in-python 42 | https://github.com/gruns/furl a good util for url operator 43 | :return: 44 | """ 45 | para = {"SecretId": "xy_123_move", "url": "http://xy.xxx.com/22.jpg"} 46 | 47 | print urllib.urlencode(para) 48 | # url=http%3A%2F%2Fxy.xxx.com%2F22.jpg&SecretId=xy_123_move 49 | 50 | base_url = 'xy.com/index.php' 51 | 52 | # 记得 下面的是 ? 连接 53 | return 'https://%s?%s' % (base_url, '&'.join('%s=%s' % (k, urllib.quote(str(v))) for k, v in para.iteritems())) 54 | 55 | 56 | if __name__ == '__main__': 57 | # get_ret_from_http() 58 | # multi_process_stress_test() 59 | 60 | print make_url() 61 | # s = "abc" 62 | s = "abc" 63 | print urllib.quote(s) 64 | pass 65 | -------------------------------------------------------------------------------- /python_utils/thread_process/pool_queue.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | """ 3 | This file light weight thread pool queue 4 | Mainly cited from 5 | `http://stackoverflow.com/questions/3033952/python-thread-pool-similar-to-the-multiprocessing-pool` 6 | """ 7 | import threading 8 | import logging 9 | from time import sleep 10 | from random import randint 11 | from Queue import Queue 12 | 13 | logging.basicConfig(level=logging.DEBUG, 14 | format='%(levelname)s %(asctime)s %(threadName)s %(message)s', 15 | datefmt='%Y-%m-%d %I:%M:%S') 16 | 17 | 18 | class Worker(threading.Thread): 19 | def __init__(self, task): 20 | super(Worker, self).__init__() 21 | self.task = task 22 | self.daemon = True # if don't set that, then the thread won't stop automatically 23 | self.start() 24 | 25 | def run(self): 26 | while True: 27 | logging.debug('waiting for queue') 28 | func, args, kargs = self.task.get() 29 | try: 30 | logging.debug('now I am going to do task') 31 | func(*args, **kargs) 32 | except Exception, e: 33 | logging.warn(e) 34 | finally: 35 | self.task.task_done() 36 | 37 | 38 | class ThreadPool: 39 | def __init__(self, num_thread): 40 | self.tasks = Queue(num_thread) 41 | for w in xrange(num_thread): 42 | Worker(self.tasks) 43 | 44 | def add_task(self, func, *args, **kwargs): 45 | self.tasks.put((func, args, kwargs)) 46 | pass 47 | 48 | def wait_completion(self): 49 | """ 50 | the corresponding consume thread should be a daemon thread, 51 | so it can exit automatically 52 | :return: 53 | """ 54 | self.tasks.join() 55 | pass 56 | 57 | 58 | def handler(sec): 59 | logging.debug('now I will sleep %s S', sec) 60 | sleep(sec) 61 | 62 | 63 | def test(): 64 | lst_sleep_sec = [randint(5, 20) for i in xrange(20)] 65 | pool = ThreadPool(5) 66 | for sec in lst_sleep_sec: 67 | pool.add_task(handler, sec) 68 | 69 | pool.wait_completion() 70 | pass 71 | 72 | if __name__ == '__main__': 73 | test() 74 | pass 75 | -------------------------------------------------------------------------------- /python_utils/sk_sc_pd_operator/pd_ope.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | 7 | def idx_align_series(): 8 | A = pd.Series([2, 4, 6], index=[0, 1, 2]) 9 | B = pd.Series([1, 3, 5], index=[1, 2, 3]) 10 | 11 | print A + B 12 | # 0 NaN 13 | # 1 5.0 14 | # 2 9.0 15 | # 3 NaN 16 | # dtype: float64 17 | 18 | print A.add(B, fill_value=0) 19 | # 0 2.0 20 | # 1 5.0 21 | # 2 9.0 22 | # 3 5.0 23 | # dtype: float64 24 | 25 | 26 | def idx_align_df(): 27 | rng = np.random.RandomState(42) 28 | df1 = pd.DataFrame(rng.randint(0, 20, (2, 2)), 29 | columns=list('AB')) 30 | print df1 31 | # A B 32 | # 0 6 19 33 | # 1 14 10 34 | df2 = pd.DataFrame(rng.randint(0, 10, (3, 3)), 35 | columns=list('BAC')) 36 | 37 | print df2 38 | # B A C 39 | # 0 7 4 6 40 | # 1 9 2 6 41 | # 2 7 4 3 42 | 43 | print df1 + df2 44 | # A B C 45 | # 0 10.0 26.0 NaN 46 | # 1 16.0 19.0 NaN 47 | # 2 NaN NaN NaN 48 | 49 | fill = df1.stack().mean() 50 | print fill 51 | # 12.25 52 | print df1.add(df2, fill_value=fill) 53 | # A B C 54 | # 0 10.00 26.00 18.25 55 | # 1 16.00 19.00 18.25 56 | # 2 16.25 19.25 15.25 57 | 58 | print df1 59 | # A B 60 | # 0 6 19 61 | # 1 14 10 62 | 63 | 64 | def row_col_ope(): 65 | rng = np.random.RandomState(42) 66 | arr = rng.randint(10, size=(3, 4)) 67 | print arr 68 | # [[6 3 7 4] 69 | # [6 9 2 6] 70 | # [7 4 3 7]] 71 | print arr - arr[0] 72 | # [[ 0 0 0 0] 73 | # [ 0 6 -5 2] 74 | # [ 1 1 -4 3]] 75 | df = pd.DataFrame(arr, columns=list('QRST')) 76 | print df - df.iloc[0] 77 | # Q R S T 78 | # 0 0 0 0 0 79 | # 1 0 6 -5 2 80 | # 2 1 1 -4 3 81 | print df.subtract(df['R'], axis=0) 82 | # Q R S T 83 | # 0 3 0 4 1 84 | # 1 -3 0 -7 -3 85 | # 2 3 0 -1 3 86 | 87 | print df # no change 88 | 89 | if __name__ == '__main__': 90 | row_col_ope() 91 | # idx_align_df() 92 | # idx_align_series() 93 | pass 94 | -------------------------------------------------------------------------------- /python_utils/machine_learn/knearest/knn_classify_sklearn.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | """ 4 | use sklearn to do knn prediction 5 | 6 | data set from: https://archive.ics.uci.edu/ml/datasets/Iris 7 | mainly cited from the below blog: 8 | https://kevinzakka.github.io/2016/07/13/k-nearest-neighbor/ 9 | """ 10 | 11 | import numpy as np 12 | from sklearn.metrics import accuracy_score 13 | from sklearn.neighbors import KNeighborsClassifier 14 | from sklearn.model_selection import train_test_split, cross_val_score 15 | import pandas as pd 16 | import matplotlib.pyplot as plt 17 | 18 | 19 | def load_data(): 20 | names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class'] 21 | # loading training data 22 | path = '../dataset/knn/iris_data.txt' 23 | df = pd.read_csv(path, header=None, names=names) 24 | # print df.head() 25 | x = np.array(df.ix[:, 0: 4]) 26 | y = np.array(df['class']) 27 | 28 | print x.shape, y.shape 29 | # x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=40) 30 | return train_test_split(x, y, test_size=0.33, random_state=40) 31 | 32 | 33 | def predict(): 34 | x_train, x_test, y_train, y_test = load_data() 35 | k = 3 36 | knn = KNeighborsClassifier(n_neighbors=k) 37 | knn.fit(x_train, y_train) 38 | pred = knn.predict(x_test) 39 | print accuracy_score(y_test, pred) 40 | 41 | 42 | def cross_validation(): 43 | x_train, x_test, y_train, y_test = load_data() 44 | k_lst = list(range(1, 30)) 45 | lst_scores = [] 46 | 47 | for k in k_lst: 48 | knn = KNeighborsClassifier(n_neighbors=k) 49 | scores = cross_val_score(knn, x_train, y_train, cv=10, scoring='accuracy') 50 | lst_scores.append(scores.mean()) 51 | 52 | # changing to misclassification error 53 | MSE = [1 - x for x in lst_scores] 54 | optimal_k = k_lst[MSE.index(min(MSE))] 55 | print "The optimal number of neighbors is %d" % optimal_k 56 | # plot misclassification error vs k 57 | # plt.plot(k_lst, MSE) 58 | # plt.ylabel('Misclassification Error') 59 | plt.plot(k_lst, lst_scores) 60 | plt.xlabel('Number of Neighbors K') 61 | plt.ylabel('correct classification rate') 62 | plt.show() 63 | 64 | if __name__ == '__main__': 65 | # load_data() 66 | predict() 67 | # cross_validation() 68 | pass 69 | -------------------------------------------------------------------------------- /python_utils/py_basic/base64_test.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | """ 3 | This file about some operator of base64 in python. 4 | eg. image to base64 string, and then the base64 string to image 5 | For a http issue, we may want to send image directly not url, for sake of 6 | unvisited url by other server 7 | cited from https://stackoverflow.com/questions/3715493/encoding-an-image-file-with-base64 8 | 9 | base64 decode exception 10 | https://stackoverflow.com/questions/12315398/verify-is-a-string-is-encoded-in-base64-python 11 | """ 12 | import base64 13 | import requests 14 | import binascii 15 | import time 16 | 17 | 18 | def img_base64(): 19 | img_path = 'F:/img_test/dl_img_text_recognition/online_1.jpg' 20 | with open(img_path, 'rb') as img_file: 21 | b64_str = base64.b64encode(img_file.read()) 22 | print len(b64_str) 23 | # 55932 24 | print b64_str 25 | # /9j/4AAQSkZ............. 26 | 27 | 28 | def img_url_base64(): 29 | url = 'http://i2.chinanews.com/simg/hd/2017/05/15/b3e10469cc0b4b84b2e9cedbb800cd3a.jpg' 30 | url = 'http://yysnapshot.bs2ctl7.yy.com/68a14b739dac400d1d1898327478a556b52260ec?height=720&interval=12402&file=68a14b739dac400d1d1898327478a556b52260ec&width=1280&bucket=yysnapshot&yid=7841950807447568392&day=20170820&t=1503163828000&streamid=7841950807452359080&id=3228019745205722527&size2=320&p=1' 31 | url = 'http://imgcache.qq.com/open_proj/proj_qcloud_v2/gateway/portal/css/img/home/qcloud-logo-dark.png' 32 | b64_str = base64.b64encode(requests.get(url).content) 33 | print len(b64_str) 34 | print b64_str 35 | 36 | 37 | def base64_exception(): 38 | s_non_b64 = 'not base64 str 123 456 ' 39 | try: 40 | print base64.decodestring(s_non_b64) 41 | # except Exception as e: 42 | except binascii.Error as e: 43 | # you'd better catch exception 44 | print "base64 decode error %s " % e 45 | 46 | 47 | def b64_test(): 48 | import time 49 | # test 300kb string decode time 50 | s = 'a' * 1024 * 300 51 | start = time.clock() 52 | b64_str = base64.b64encode(s) 53 | print time.clock() - start 54 | start = time.clock() 55 | base64.decodestring(b64_str) 56 | print time.clock() - start 57 | 58 | if __name__ == '__main__': 59 | # img_base64() 60 | # img_url_base64() 61 | # base64_exception() 62 | b64_test() 63 | pass 64 | -------------------------------------------------------------------------------- /python_utils/sk_sc_pd_operator/pd_dummy_val.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | 4 | import pandas as pd 5 | 6 | 7 | def pd_dummy_val_1(): 8 | raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], 9 | 'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'], 10 | 'sex': ['male', 'female', 'male', 'female', 'female']} 11 | 12 | df = pd.DataFrame(raw_data, columns=['first_name', 'last_name', 'sex']) 13 | print df 14 | 15 | # first_name last_name sex 16 | # 0 Jason Miller male 17 | # 1 Molly Jacobson female 18 | # 2 Tina Ali male 19 | # 3 Jake Milner female 20 | # 4 Amy Cooze female 21 | 22 | # Create a set of dummy variables from the sex variable 23 | df_sex = pd.get_dummies(df['sex']) 24 | # Join the dummy variables to the main dataframe 25 | df_new = pd.concat([df, df_sex], axis=1) 26 | print df_new 27 | # first_name last_name sex female male 28 | # 0 Jason Miller male 0.0 1.0 29 | # 1 Molly Jacobson female 1.0 0.0 30 | # 2 Tina Ali male 0.0 1.0 31 | # 3 Jake Milner female 1.0 0.0 32 | # 4 Amy Cooze female 1.0 0.0 33 | 34 | def pd_dummy_val_2(): 35 | raw_data = {"work_hour": [9, 9, 9, 9, 9, 9, 6], 36 | "day": ["mon", "tus", "wend", "thur", "fri", "sta", "sun"]} 37 | 38 | df = pd.DataFrame(raw_data, columns=['work_hour', 'day']) 39 | 40 | print df 41 | # work_hour day 42 | # 0 9 mon 43 | # 1 9 tus 44 | # 2 9 wend 45 | # 3 9 thur 46 | # 4 9 fri 47 | # 5 9 sta 48 | # 6 6 sun 49 | 50 | df_day = pd.get_dummies(df['day']) 51 | 52 | df_new = pd.concat([df, df_day], axis=1) 53 | 54 | print df_new 55 | # work_hour day fri mon sta sun thur tus wend 56 | # 0 9 mon 0.0 1.0 0.0 0.0 0.0 0.0 0.0 57 | # 1 9 tus 0.0 0.0 0.0 0.0 0.0 1.0 0.0 58 | # 2 9 wend 0.0 0.0 0.0 0.0 0.0 0.0 1.0 59 | # 3 9 thur 0.0 0.0 0.0 0.0 1.0 0.0 0.0 60 | # 4 9 fri 1.0 0.0 0.0 0.0 0.0 0.0 0.0 61 | # 5 9 sta 0.0 0.0 1.0 0.0 0.0 0.0 0.0 62 | # 6 6 sun 0.0 0.0 0.0 1.0 0.0 0.0 0.0 63 | 64 | if __name__ == '__main__': 65 | pd_dummy_val_1() 66 | # pd_dummy_val_2() 67 | pass 68 | -------------------------------------------------------------------------------- /python_utils/http_basic/http_realize/static_server/static_server.py: -------------------------------------------------------------------------------- 1 | import sys, os, BaseHTTPServer 2 | 3 | 4 | class ServerException(Exception): 5 | '''For internal error reporting.''' 6 | pass 7 | 8 | 9 | class CaseNoFile(object): 10 | @staticmethod 11 | def test(handler): 12 | return not os.path.exists(handler.full_path) 13 | 14 | @staticmethod 15 | def act(handler): 16 | raise ServerException({"'{0}' not found".format(handler.full_path)}) 17 | 18 | 19 | class CaseExistFile(object): 20 | @staticmethod 21 | def test(handler): 22 | return os.path.isfile(handler.full_path) 23 | 24 | @staticmethod 25 | def act(handler): 26 | handler.handle_file(handler.full_path) 27 | 28 | 29 | class CaseError(object): 30 | @staticmethod 31 | def test(handler): 32 | return True 33 | 34 | @staticmethod 35 | def act(handler): 36 | raise ServerException("'{0}' unknown object".format(handler.full_path)) 37 | 38 | 39 | class RequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): 40 | 41 | CasesLst = [CaseNoFile, CaseExistFile, CaseError] 42 | 43 | Error_Page = """\ 44 | 45 | 46 |

Error accessing {path}

47 |

{msg}

48 | 49 | 50 | """ 51 | 52 | def do_GET(self): 53 | try: 54 | self.full_path = os.getcwd() + self.path 55 | for case in self.CasesLst: 56 | if case.test(self): 57 | case.act(self) 58 | break 59 | 60 | except Exception as msg: 61 | self.handle_error(msg) 62 | 63 | def handle_file(self, path): 64 | try: 65 | with open(path, 'rb') as reader: 66 | content = reader.read() 67 | self.send_content(content) 68 | except IOError as msg: 69 | msg = "'{0}' cannot be read: {1}".format(self.path, msg) 70 | self.handle_error(msg) 71 | 72 | def handle_error(self, msg): 73 | content = self.Error_Page.format(path=self.path, msg=msg) 74 | self.send_content(content) 75 | 76 | def send_content(self, content): 77 | self.send_response(200) 78 | self.send_header("Content-type", "text/html") 79 | self.send_header("Content-Length", str(len(content))) 80 | self.end_headers() 81 | self.wfile.write(content) 82 | 83 | if __name__ == '__main__': 84 | serverAddress = ('', 8888) 85 | server = BaseHTTPServer.HTTPServer(serverAddress, RequestHandler) 86 | server.serve_forever() 87 | -------------------------------------------------------------------------------- /python_utils/numpy_operate/idx_arrays.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import numpy as np 4 | 5 | 6 | def single_idx(): 7 | arr = np.arange(6) 8 | print arr 9 | # [0 1 2 3 4 5] 10 | print arr[1], arr[-1] 11 | # 1 5 12 | arr2 = np.arange(10) 13 | print arr2 14 | # [0 1 2 3 4 5 6 7 8 9] 15 | print arr2[2: 5], arr2[:-7], arr2[1: 7: 2] 16 | # [2 3 4] [0 1 2] [1 3 5] 17 | 18 | b = arr2 > 7 19 | print b 20 | # [False False False False False False False False True True] 21 | print arr2[b] 22 | # [8 9] 23 | 24 | 25 | def multidimen_idx(): 26 | arr = np.arange(6) 27 | print arr 28 | # [0 1 2 3 4 5] 29 | arr.shape = (2, 3) 30 | print arr 31 | # [[0 1 2] 32 | # [3 4 5]] 33 | 34 | print arr[1, 1], arr[1, -1] 35 | # 4 5 36 | print arr[1], arr[1][1] 37 | # [3 4 5] 4 38 | 39 | arr2 = np.arange(35).reshape(5, 7) 40 | print arr2 41 | # [[ 0 1 2 3 4 5 6] 42 | # [ 7 8 9 10 11 12 13] 43 | # [14 15 16 17 18 19 20] 44 | # [21 22 23 24 25 26 27] 45 | # [28 29 30 31 32 33 34]] 46 | 47 | print arr2[1:5:2] 48 | # [[ 7 8 9 10 11 12 13] 49 | # [21 22 23 24 25 26 27]] 50 | 51 | # 1:5:2 means row 2, 4, ::3 means every 3 column 52 | print arr2[1:5:2, ::3] 53 | # [[ 7 10 13] 54 | # [21 24 27]] 55 | 56 | print arr2[np.array([0, 2, 4]), 1:3] 57 | # [[ 1 2] 58 | # [15 16] 59 | # [29 30]] 60 | 61 | print arr2[np.array([0, 2, 4]), np.array([0, 1, 2])] 62 | # [ 0 15 30] 63 | 64 | print arr2[np.array([0, 2, 4]), 1] 65 | # [ 1 15 29] 66 | 67 | b = arr2 > 20 68 | print arr2[b] 69 | # [21 22 23 24 25 26 27 28 29 30 31 32 33 34] 70 | 71 | 72 | def n_dimension_arr(): 73 | arr = np.arange(30).reshape(2, 3, 5) 74 | print arr 75 | # [[[ 0 1 2 3 4] 76 | # [ 5 6 7 8 9] 77 | # [10 11 12 13 14]] 78 | # 79 | # [[15 16 17 18 19] 80 | # [20 21 22 23 24] 81 | # [25 26 27 28 29]]] 82 | 83 | 84 | def assign_val(): 85 | arr = np.arange(10) 86 | print arr 87 | # [0 1 2 3 4 5 6 7 8 9] 88 | arr[2:7] = 1 89 | print arr 90 | # [0 1 1 1 1 1 1 7 8 9] 91 | arr[2:7] = range(5) 92 | print arr 93 | # [0 1 0 1 2 3 4 7 8 9]  94 | 95 | 96 | def np_argmax(): 97 | arr = np.array([1, 5, 3]) 98 | print np.argmax(arr) 99 | 100 | 101 | if __name__ == '__main__': 102 | np_argmax() 103 | # assign_val() 104 | # n_dimension_arr() 105 | # single_idx() 106 | # multidimen_idx() 107 | pass 108 | -------------------------------------------------------------------------------- /python_utils/thread_process/thread_lock.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | """ 4 | This file is about operator of threading.Lock 5 | using which we can realize a thread safe counter 6 | """ 7 | 8 | import logging 9 | import threading 10 | import random 11 | import time 12 | 13 | 14 | logging.basicConfig(level=logging.DEBUG, 15 | format='(%(asctime)s %(threadName)-10s) %(message)s', 16 | datefmt='%Y-%m-%d %I:%M:%S') 17 | 18 | 19 | class CounterThreadSafe(threading.Thread): 20 | def __init__(self, start=0): 21 | super(CounterThreadSafe, self).__init__() 22 | self.val = start 23 | self.lock = threading.Lock() 24 | 25 | def inc(self, num): 26 | try: 27 | logging.debug('wanting for lock, before num is %s val is %s', num, self.val) 28 | self.lock.acquire() 29 | self.val += num 30 | logging.debug('after counter val is %s', self.val) 31 | finally: 32 | self.lock.release() 33 | 34 | def inc_v2(self, num): 35 | logging.debug('wanting for lock, before num is %s val is %s', num, self.val) 36 | with self.lock: 37 | self.val += num 38 | logging.debug('after counter val is %s', self.val) 39 | 40 | g_sum = 0 41 | 42 | 43 | def do_counter(counter): 44 | global g_sum 45 | for i in xrange(0, 2): 46 | sleep_sec = random.randint(1, 3) 47 | logging.debug('now sleeping %s S', sleep_sec) 48 | g_sum += sleep_sec 49 | time.sleep(sleep_sec) 50 | # counter.inc(sleep_sec) 51 | counter.inc_v2(sleep_sec) 52 | 53 | 54 | def test_multi_thread_counter(): 55 | counter = CounterThreadSafe() 56 | 57 | for i in xrange(0, 5): 58 | t = threading.Thread(target=do_counter, args=(counter, )) 59 | t.start() 60 | # t.join() # can't join in this row or it will block the main thread 61 | 62 | logging.debug('start all thread.....done') 63 | 64 | 65 | def join_all_others_thread(): 66 | logging.debug('now join all the other threads') 67 | main_thread = threading.currentThread() 68 | for t in threading.enumerate(): 69 | if t is not main_thread: 70 | t.join() 71 | 72 | # the following msg will be print after all the other thread done 73 | logging.debug('join all the other threads success') 74 | 75 | 76 | if __name__ == '__main__': 77 | test_multi_thread_counter() 78 | join_all_others_thread() 79 | # the following msg will be print after all the other thread done 80 | logging.debug('all the sub threads done') 81 | logging.debug('g_sum is %s', g_sum) 82 | pass 83 | -------------------------------------------------------------------------------- /python_utils/machine_learn/dataset/perception/dataset.txt: -------------------------------------------------------------------------------- 1 | -0.017612 14.053064 0 2 | -1.395634 4.662541 1 3 | -0.752157 6.538620 0 4 | -1.322371 7.152853 0 5 | 0.423363 11.054677 0 6 | 0.406704 7.067335 1 7 | 0.667394 12.741452 0 8 | -2.460150 6.866805 1 9 | 0.569411 9.548755 0 10 | -0.026632 10.427743 0 11 | 0.850433 6.920334 1 12 | 1.347183 13.175500 0 13 | 1.176813 3.167020 1 14 | -1.781871 9.097953 0 15 | -0.566606 5.749003 1 16 | 0.931635 1.589505 1 17 | -0.024205 6.151823 1 18 | -0.036453 2.690988 1 19 | -0.196949 0.444165 1 20 | 1.014459 5.754399 1 21 | 1.985298 3.230619 1 22 | -1.693453 -0.557540 1 23 | -0.576525 11.778922 0 24 | -0.346811 -1.678730 1 25 | -2.124484 2.672471 1 26 | 1.217916 9.597015 0 27 | -0.733928 9.098687 0 28 | -3.642001 -1.618087 1 29 | 0.315985 3.523953 1 30 | 1.416614 9.619232 0 31 | -0.386323 3.989286 1 32 | 0.556921 8.294984 1 33 | 1.224863 11.587360 0 34 | -1.347803 -2.406051 1 35 | 1.196604 4.951851 1 36 | 0.275221 9.543647 0 37 | 0.470575 9.332488 0 38 | -1.889567 9.542662 0 39 | -1.527893 12.150579 0 40 | -1.185247 11.309318 0 41 | -0.445678 3.297303 1 42 | 1.042222 6.105155 1 43 | -0.618787 10.320986 0 44 | 1.152083 0.548467 1 45 | 0.828534 2.676045 1 46 | -1.237728 10.549033 0 47 | -0.683565 -2.166125 1 48 | 0.229456 5.921938 1 49 | -0.959885 11.555336 0 50 | 0.492911 10.993324 0 51 | 0.184992 8.721488 0 52 | -0.355715 10.325976 0 53 | -0.397822 8.058397 0 54 | 0.824839 13.730343 0 55 | 1.507278 5.027866 1 56 | 0.099671 6.835839 1 57 | -0.344008 10.717485 0 58 | 1.785928 7.718645 1 59 | -0.918801 11.560217 0 60 | -0.364009 4.747300 1 61 | -0.841722 4.119083 1 62 | 0.490426 1.960539 1 63 | -0.007194 9.075792 0 64 | 0.356107 12.447863 0 65 | 0.342578 12.281162 0 66 | -0.810823 -1.466018 1 67 | 2.530777 6.476801 1 68 | 1.296683 11.607559 0 69 | 0.475487 12.040035 0 70 | -0.783277 11.009725 0 71 | 0.074798 11.023650 0 72 | -1.337472 0.468339 1 73 | -0.102781 13.763651 0 74 | -0.147324 2.874846 1 75 | 0.518389 9.887035 0 76 | 1.015399 7.571882 0 77 | -1.658086 -0.027255 1 78 | 1.319944 2.171228 1 79 | 2.056216 5.019981 1 80 | -0.851633 4.375691 1 81 | -1.510047 6.061992 0 82 | -1.076637 -3.181888 1 83 | 1.821096 10.283990 0 84 | 3.010150 8.401766 1 85 | -1.099458 1.688274 1 86 | -0.834872 -1.733869 1 87 | -0.846637 3.849075 1 88 | 1.400102 12.628781 0 89 | 1.752842 5.468166 1 90 | 0.078557 0.059736 1 91 | 0.089392 -0.715300 1 92 | 1.825662 12.693808 0 93 | 0.197445 9.744638 0 94 | 0.126117 0.922311 1 95 | -0.679797 1.220530 1 96 | 0.677983 2.556666 1 97 | 0.761349 10.693862 0 98 | -2.168791 0.143632 1 99 | 1.388610 9.341997 0 100 | 0.317029 14.739025 0 -------------------------------------------------------------------------------- /python_utils/machine_learn/logistic_regression/lr_scratch.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | 7 | class LogisticRegression(object): 8 | def __init__(self): 9 | self._map_method() 10 | pass 11 | 12 | def _map_method(self): 13 | self._do_train = {"gd": self._gd, "sgd": self._sgd} 14 | 15 | def _sigmoid(self, x): 16 | return 1.0 / (1 + np.exp(-x)) 17 | 18 | def fit(self, X, Y, **opt): 19 | m, n = X.shape 20 | self._weight = np.ones((n, 1)) 21 | max_iter = opt.get("max_iter", 100) 22 | alpha = opt.get("alpha", 0.01) 23 | method = opt.get("method", "sgd") 24 | 25 | for k in xrange(max_iter): 26 | try: 27 | self._do_train[method](X, Y, alpha) 28 | 29 | print "iter %s error rate %s" % (k, self._get_error_rate(X, Y)) 30 | except KeyError: 31 | raise ValueError('method error') 32 | 33 | def _sgd(self, X, Y, alpha): 34 | """stochastic gradient descent""" 35 | m, n = X.shape 36 | for i in xrange(m): 37 | # pred = self._sigmoid(X[i, :] * self._weight) 38 | pred = self._sigmoid(np.dot(X[i, :], self._weight)) 39 | error = Y[i] - pred 40 | self._weight = self._weight + alpha * np.matrix(X[i, :]).T * error 41 | 42 | def _gd(self, X, Y, alpha): 43 | """gradient descent""" 44 | pred = self._sigmoid(X * self._weight) 45 | error = Y - pred 46 | self._weight = self._weight + alpha * X.T * error 47 | 48 | def _get_error_rate(self, X, Y): 49 | all_num = len(Y) 50 | error_num = 0 51 | for i in xrange(all_num): 52 | pred = self._sigmoid(np.dot(X[i, :], self._weight)) > 0.5 53 | if pred != bool(Y[i]): 54 | error_num += 1 55 | 56 | return error_num * 1.0 / all_num 57 | 58 | 59 | def get_data(): 60 | path = '../dataset/logistic_regression/lr_ml_action.txt' 61 | 62 | data = pd.read_csv(path, delim_whitespace=True, 63 | names=['f1', 'f2', 'label'], 64 | dtype={'A': np.float64, 'B': np.float64, 'C': np.int64}) 65 | 66 | # add bias w0 67 | data['f0'] = 1 68 | print data.head() 69 | features = ['f0', 'f1', 'f2'] 70 | return data[features].values, data.label.values 71 | 72 | 73 | def test_lr(): 74 | X, Y = get_data() 75 | 76 | lr = LogisticRegression() 77 | arr = np.array([-1, 0, 1]) 78 | print lr._sigmoid(arr) 79 | lr.fit(X, Y) 80 | 81 | 82 | if __name__ == '__main__': 83 | # get_data() 84 | test_lr() 85 | pass 86 | -------------------------------------------------------------------------------- /python_utils/py_basic/functional_program.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | from functools import partial 4 | 5 | 6 | def inc(x): 7 | def inc_x(y): 8 | return x + y 9 | return inc_x 10 | 11 | 12 | def test_inc(): 13 | inc2 = inc(2) 14 | inc5 = inc(5) 15 | 16 | print inc2(5) # 7 17 | print inc5(5) # 10 18 | 19 | 20 | def to_upper(item): 21 | return item.upper() 22 | 23 | 24 | def map_demo(): 25 | name_lst = ['xy', 'bear fish', 'jay'] 26 | name_len = map(len, name_lst) 27 | print name_len 28 | # [2, 9, 3] 29 | 30 | name_upper = map(to_upper, name_lst) 31 | print name_upper 32 | # ['XY', 'BEAR FISH', 'JAY'] 33 | 34 | name_up = [] 35 | for i in range(len(name_lst)): 36 | name_up.append(name_lst[i].upper()) 37 | print name_up 38 | # ['XY', 'BEAR FISH', 'JAY'] 39 | 40 | squares = map(lambda x: x * x, range(4)) 41 | print squares 42 | # [0, 1, 4, 9] 43 | 44 | a = [1, 2, 3, 4] 45 | b = [17, 12, 11, 10] 46 | c = [-1, -4, 5, 9] 47 | print map(lambda x, y: x + y, a, b) 48 | # [18, 14, 14, 14] 49 | print map(lambda x, y, z: x + y + z, a, b, c) 50 | # [17, 10, 19, 23] 51 | 52 | 53 | def reduce_demo(): 54 | lst = range(1, 6) 55 | print lst # [1, 2, 3, 4, 5] 56 | sum = reduce(lambda x, y: x + y, lst) 57 | print sum # 15 58 | 59 | 60 | def cal_aver(): 61 | lst = range(0, 11) 62 | positive_num_cnt = 0 63 | positive_num_sum = 0 64 | for i in range(len(lst)): 65 | if lst[i] > 0: 66 | positive_num_cnt += 1 67 | positive_num_sum += lst[i] 68 | 69 | average = 0 70 | if positive_num_cnt > 0: 71 | average = positive_num_sum / positive_num_cnt 72 | 73 | print average # 5 74 | 75 | 76 | def filter_demo(): 77 | lst = range(0, 11) 78 | odd_lst = filter(lambda x: x % 2, lst) 79 | print odd_lst # [1, 3, 5, 7, 9] 80 | average = reduce(lambda x, y: x + y, odd_lst) / len(odd_lst) 81 | print average # 5 82 | 83 | 84 | def f(a, b, c, d): 85 | """ 86 | used by function partial_demo as demo 87 | :param a: int 88 | :param b: int 89 | :param c: int 90 | :param d: int 91 | :return: 92 | """ 93 | return a * 1000 + b * 100 + c * 10 + d 94 | 95 | 96 | def partial_demo(): 97 | # A partial function that calls f with 98 | # a as 3, b as 1 and c as 4. 99 | g = partial(f, 3, 1, 4) 100 | print g(5) # 3145 101 | 102 | g2 = partial(f, d=4, c=3, b=2) 103 | print g2(1) # 1234 104 | 105 | 106 | if __name__ == '__main__': 107 | partial_demo() 108 | # test_inc() 109 | # map_demo() 110 | # reduce_demo() 111 | # filter_demo() 112 | # cal_aver() 113 | pass 114 | -------------------------------------------------------------------------------- /python_utils/py_basic/kwargs_xargs.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | 4 | def print_keyword_args(**kwargs): 5 | # kwargs is a dict of the keyword args passed to the function 6 | for key, value in kwargs.iteritems(): 7 | print "%s = %s" % (key, value) 8 | 9 | 10 | def test_print_keyword_args(): 11 | print_keyword_args(first_name="John", last_name="Doe") 12 | # first_name = John 13 | # last_name = Doe 14 | 15 | dic_args = {'first_name': 'Bobby', 'last_name': 'Smith'} 16 | # print_keyword_args(dic_args) 17 | # TypeError: print_keyword_args() takes exactly 0 arguments (1 given) 18 | print_keyword_args(**dic_args) 19 | # first_name = Bobby 20 | # last_name = Smith 21 | 22 | 23 | def print_everything(*args): 24 | for count, thing in enumerate(args): 25 | print '{0} -> {1}'.format(count, thing) 26 | 27 | 28 | def test_print_everything(): 29 | print_everything('apple', 'banana', 'cabbage') 30 | # 0->apple 31 | # 1->banana 32 | # 2->cabbage 33 | 34 | 35 | def func(required_arg, *args, **kwargs): 36 | # required_arg is a positional-only parameter. 37 | print required_arg 38 | 39 | # args is a tuple of positional arguments, 40 | # because the parameter name has * prepended. 41 | if args: # If args is not empty. 42 | print args 43 | 44 | # kwargs is a dictionary of keyword arguments, 45 | # because the parameter name has ** prepended. 46 | if kwargs: # If kwargs is not empty. 47 | print kwargs 48 | 49 | 50 | def test_func(): 51 | func("required argument") 52 | # required argument 53 | func("required argument", 1, 2, '3') 54 | # required argument 55 | # (1, 2, '3') 56 | func("required argument", 1, 2, '3', keyword1=4, keyword2="foo") 57 | # required argument 58 | # (1, 2, '3') 59 | # {'keyword2': 'foo', 'keyword1': 4} 60 | # func() 61 | # TypeError: func() takes at least 1 argument (0 given) 62 | 63 | 64 | # kwargs default value 65 | class ExampleClass: 66 | def __init__(self, **kwargs): 67 | self.val = kwargs['val'] 68 | self.val2 = kwargs.get('val2') 69 | self.val3 = kwargs.get('val3', 'default_val3') 70 | self.val4 = kwargs.pop('val4', 'default_val4') 71 | 72 | 73 | def default_kwargs(**kwargs): 74 | options = { 75 | 'option1': 'default_value1', 76 | 'option2': 'default_value2', 77 | 'option3': 'default_value3', } 78 | 79 | options.update(kwargs) 80 | print options 81 | 82 | 83 | def test_default_kwargs(): 84 | default_kwargs() 85 | # {'option2': 'default_value2', 'option3': 'default_value3', 'option1': 'default_value1'} 86 | default_kwargs(option1='new_value1', option3='new_value3') 87 | # {'option2': 'default_value2', 'option3': 'new_value3', 'option1': 'new_value1'} 88 | 89 | if __name__ == '__main__': 90 | # test_print_keyword_args() 91 | # test_print_everything() 92 | # test_func() 93 | test_default_kwargs() 94 | pass 95 | -------------------------------------------------------------------------------- /python_utils/py_basic/str_basic.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | import string 4 | import random 5 | 6 | 7 | def str_format(): 8 | s1 = """abc%sddd""" % 'asd' 9 | print s1 10 | url = """http://127.0.0.1:9325/shortvideo/checkBlack?url=%s&serial=%s""" % ('http%3A%2F%2Fzbasecapture.bs2.yy.com%2F42269159_1499248536403_3.jpg', '77777777') 11 | print url 12 | 13 | 14 | def generator_random_str(size=6, str_source=string.digits + string.lowercase): 15 | """:return size num str(in 'A~z, 0-9') 16 | eg. size=6 return 'ad14df' 17 | [random.choice('abcde') for _ in range(3)] -> ['a', 'b', 'b'] 18 | ''.join(['a', 'b', 'b']) -> 'abb' 19 | """ 20 | return ''.join(random.choice(str_source) for _ in xrange(size)) 21 | 22 | 23 | def str_split(): 24 | s = 'python_worker_name&topSid_111_appid_111&topSid_222_appid_222' 25 | print s[0:s.find('&')] 26 | print s.split('&') 27 | print s.split('&')[1:] 28 | 29 | 30 | def remove_sub_str(): 31 | src = 'channel_1' 32 | sub_s = 'chan' 33 | print src[src.find(sub_s):] 34 | print src.find(sub_s) 35 | print src.replace(sub_s, '') 36 | 37 | 38 | def str_format_once(): 39 | query = """insert into {tb_name} (create_time, appid) VALUES (%s,%s)""" 40 | tb_name = 'tb_audio_rec_ret_2017_11' 41 | # query % tb_name error 42 | print query.format(tb_name=tb_name) 43 | # insert into tb_audio_rec_ret_2017_11 (create_time, appid) VALUES (%s,%s) 44 | 45 | 46 | def str_replace(): 47 | import time 48 | s = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 49 | print s 50 | # 2017-11-09 17:26:34 51 | print s[0:7].replace('-', '_') 52 | # 2017_11 53 | 54 | 55 | def char_2int_2char(): 56 | print ord('a') 57 | # 97 58 | print chr(97) 59 | # a 60 | 61 | 62 | def fill_zero(): 63 | s_num = 11 64 | print str(s_num).zfill(5) 65 | # 00011 66 | 67 | 68 | if __name__ == '__main__': 69 | print fill_zero() 70 | # char_2int_2char() 71 | # str_replace() 72 | # str_format_once() 73 | # remove_sub_str() 74 | # str_split() 75 | # str_format() 76 | # print generator_random_str() 77 | # print generator_random_str(3, 'abc123') 78 | # s = '123' 79 | # if s.find("12") == -1: 80 | # print 'no no ' 81 | 82 | b = 0 83 | b = None 84 | # if b is not zero not None(like -1, 1) it will print 85 | if b: 86 | print '%s not zero' % b 87 | 88 | # url = 'bear fish.com' 89 | # if url.endswith('.com'): 90 | # url = url[:-4] 91 | # print url 92 | 93 | url = 'www.myzaker.com/article/58daf1b69490cbe53400001b/' 94 | # if 'aa' in url: 95 | # print '1' 96 | # elif 'comp' in url: 97 | # print '2' 98 | # else: 99 | # print '3' 100 | # print url.find('myzaker') 101 | # print url.find('www.myzaker') 102 | # print url.find('http') 103 | 104 | # print s[1:] 105 | # print s[:] 106 | # print s[:2] 107 | pass 108 | -------------------------------------------------------------------------------- /python_utils/py_basic/set_ope.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | python unique list -> set usage 4 | """ 5 | 6 | 7 | def define_set(): 8 | """2 ways of defining set""" 9 | set_1 = set([1, 2, 3]) 10 | print type(set_1) 11 | print set_1 12 | 13 | set_2 = {2, 3, 2} 14 | print type(set_2) 15 | # 16 | print set_2 17 | # set([2, 3]) 18 | 19 | a = set((1, 2, 3, 4)) 20 | b = set([3, 4, 5, 6]) 21 | print a | b # Union 22 | # {1, 2, 3, 4, 5, 6} 23 | print a & b # Intersection 24 | # {3, 4} 25 | print a < b # Subset 26 | # False 27 | print a - b # Difference 28 | # {1, 2} 29 | print a ^ b # Symmetric Difference 30 | # {1, 2, 5, 6} 31 | 32 | 33 | def set_basic_usage(): 34 | s1 = set() 35 | 36 | s1.add('abc') 37 | s1.add('abc') 38 | s1.add(123) 39 | s1.add(777) 40 | print (s1) 41 | 42 | if 123 in s1: 43 | print ' find it and remove it' 44 | s1.remove(123) 45 | print s1 46 | 47 | 48 | def dict_val_set(): 49 | dic_val_set = {} 50 | dic_val_set['abc'] = set([123]) 51 | dic_val_set['abc'].add(456) 52 | dic_val_set['abc'].add(123) 53 | print dic_val_set 54 | # {'abc': set([456, 123])} 55 | dic_val_set['ddd'] = set() 56 | dic_val_set['ddd'].add(123) 57 | 58 | for k in dic_val_set.keys(): 59 | if 123 in dic_val_set[k]: 60 | print dic_val_set[k] 61 | 62 | 63 | def set_remove(): 64 | # s_src = {1, 3, 5, 7} 65 | s_src = {1} 66 | # s2 = {1, 3, 2} 67 | s2 = [1, 3, 2] 68 | # raise error 69 | # print s_src.remove(*s2) 70 | try: 71 | s_src.remove(*s2) 72 | except Exception as e: 73 | print e 74 | # print s_src 75 | print s_src - s2 76 | 77 | # print s_src | s2 78 | # set([1, 2, 3, 5, 7]) 79 | # print s_src & s2 80 | # set([1, 3]) 81 | 82 | 83 | def set_lst(): 84 | s1 = {1, 2, 3} 85 | lst_1 = [] 86 | # set to list 87 | lst_1 += s1 88 | 89 | print s1 90 | print lst_1 91 | 92 | 93 | def dict_key_to_set(): 94 | d = {'111': 1, 'aaa': 111} 95 | s1 = set(d.keys()) 96 | print s1 97 | # set(['111', 'aaa']) 98 | 99 | s2 = {'111', 111} 100 | print s1 & s2 101 | 102 | 103 | def set_diff(): 104 | s1 = {1, 3} 105 | s2 = {1, 2, 4} 106 | 107 | print s1 - s2 108 | print s1.difference(s2) 109 | # set([3]) 110 | 111 | 112 | def set_hash(): 113 | lst = [1, 555, 372, 6, 6, 372, 222] 114 | h_set = set(lst) 115 | print h_set # unordered 116 | # set([1, 555, 372, 222, 6]) 117 | 118 | if __name__ == '__main__': 119 | set_hash() 120 | # set_diff() 121 | # dict_key_to_set() 122 | # set_lst() 123 | # set_remove() 124 | # define_set() 125 | # dict_val_set() 126 | # set_basic_usage() 127 | # print min(3, 4, -1) 128 | # import time 129 | # import random 130 | # timestamp = int(time.time()) 131 | # print random.randint(0, 1000000) + timestamp 132 | pass 133 | -------------------------------------------------------------------------------- /python_utils/al_lt_common/al_str.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | 4 | def max_unique_substr(src): 5 | LEN = len(src) 6 | 7 | lst_ret = [] 8 | cur_max = 0 9 | s_tmp = '' 10 | for i in xrange(LEN): 11 | if src[i] not in s_tmp: 12 | s_tmp += src[i] 13 | cur_max += 1 14 | if len(lst_ret): 15 | if len(lst_ret[0]) < cur_max: 16 | lst_ret = [s_tmp] 17 | else: 18 | lst_ret.append(s_tmp) 19 | else: 20 | idx = s_tmp.find(src[i]) 21 | s_tmp = s_tmp[idx + 1:] + src[i] 22 | cur_max = len(s_tmp) 23 | if len(lst_ret[0]) == cur_max and s_tmp not in lst_ret: 24 | lst_ret.append(s_tmp) 25 | 26 | return lst_ret 27 | 28 | 29 | def max_unique_substr_len(src): 30 | char_last_idx = [-1] * 256 31 | b, e, cur_max, max_len = -1, -1, 0, 0 32 | 33 | for i in xrange(len(src)): 34 | char_idx = ord(src[i]) 35 | last_idx = char_last_idx[char_idx] 36 | if last_idx == -1 or last_idx > e or last_idx < b: 37 | char_last_idx[char_idx] = i 38 | e += 1 39 | cur_max += 1 40 | if cur_max > max_len: 41 | max_len = cur_max 42 | else: 43 | e = i 44 | b = last_idx + 1 45 | cur_max = e - b + 1 46 | char_last_idx[char_idx] = i 47 | 48 | return max_len 49 | 50 | 51 | def test_max_unique_substr(): 52 | s1 = 'abdefgabef' 53 | print max_unique_substr(s1) 54 | # ['abdefg', 'bdefga', 'defgab'] 55 | s1 = 'bbbb' 56 | print max_unique_substr(s1) 57 | # ['b'] 58 | s1 = 'geeksforgeeks' 59 | print max_unique_substr(s1) 60 | # ['eksforg', 'ksforge'] 61 | s1 = 'qwertqwer' 62 | print max_unique_substr(s1) 63 | 64 | s1 = 'abdefgabef' 65 | print max_unique_substr_len(s1) 66 | 67 | s1 = 'abcd' 68 | print max_unique_substr_len(s1) 69 | 70 | s1 = 'bbbb' 71 | print max_unique_substr_len(s1) 72 | 73 | s1 = 'geeksforgeeks' 74 | print max_unique_substr_len(s1) 75 | 76 | s1 = 'qwertqwer' 77 | print max_unique_substr_len(s1) 78 | 79 | 80 | def print_lst_str(lst_s): 81 | print "".join(lst_s) 82 | 83 | 84 | def str_permute(lst_s, b, e): 85 | if b == e: 86 | print_lst_str(lst_s) 87 | 88 | for i in xrange(b, e + 1): 89 | lst_s[b], lst_s[i] = lst_s[i], lst_s[b] 90 | # str_permute(lst_s, i + 1, e) 91 | str_permute(lst_s, b + 1, e) 92 | lst_s[i], lst_s[b] = lst_s[b], lst_s[i] 93 | 94 | 95 | def print_permutation_str(str): 96 | n, lst_s = len(str), list(str) 97 | str_permute(lst_s, 0, n - 1) 98 | 99 | 100 | def test_pps(): 101 | s = 'abc' 102 | print_permutation_str(s) 103 | s = 'abcd' 104 | print_permutation_str(s) 105 | 106 | 107 | if __name__ == '__main__': 108 | # test_pps() 109 | # test_max_unique_substr() 110 | 111 | print list('abc') 112 | # ['a', 'b', 'c'] 113 | pass 114 | -------------------------------------------------------------------------------- /python_utils/machine_learn/dataset/logistic_regression/lr_ml_action.txt: -------------------------------------------------------------------------------- 1 | -0.017612 14.053064 0 2 | -1.395634 4.662541 1 3 | -0.752157 6.538620 0 4 | -1.322371 7.152853 0 5 | 0.423363 11.054677 0 6 | 0.406704 7.067335 1 7 | 0.667394 12.741452 0 8 | -2.460150 6.866805 1 9 | 0.569411 9.548755 0 10 | -0.026632 10.427743 0 11 | 0.850433 6.920334 1 12 | 1.347183 13.175500 0 13 | 1.176813 3.167020 1 14 | -1.781871 9.097953 0 15 | -0.566606 5.749003 1 16 | 0.931635 1.589505 1 17 | -0.024205 6.151823 1 18 | -0.036453 2.690988 1 19 | -0.196949 0.444165 1 20 | 1.014459 5.754399 1 21 | 1.985298 3.230619 1 22 | -1.693453 -0.557540 1 23 | -0.576525 11.778922 0 24 | -0.346811 -1.678730 1 25 | -2.124484 2.672471 1 26 | 1.217916 9.597015 0 27 | -0.733928 9.098687 0 28 | -3.642001 -1.618087 1 29 | 0.315985 3.523953 1 30 | 1.416614 9.619232 0 31 | -0.386323 3.989286 1 32 | 0.556921 8.294984 1 33 | 1.224863 11.587360 0 34 | -1.347803 -2.406051 1 35 | 1.196604 4.951851 1 36 | 0.275221 9.543647 0 37 | 0.470575 9.332488 0 38 | -1.889567 9.542662 0 39 | -1.527893 12.150579 0 40 | -1.185247 11.309318 0 41 | -0.445678 3.297303 1 42 | 1.042222 6.105155 1 43 | -0.618787 10.320986 0 44 | 1.152083 0.548467 1 45 | 0.828534 2.676045 1 46 | -1.237728 10.549033 0 47 | -0.683565 -2.166125 1 48 | 0.229456 5.921938 1 49 | -0.959885 11.555336 0 50 | 0.492911 10.993324 0 51 | 0.184992 8.721488 0 52 | -0.355715 10.325976 0 53 | -0.397822 8.058397 0 54 | 0.824839 13.730343 0 55 | 1.507278 5.027866 1 56 | 0.099671 6.835839 1 57 | -0.344008 10.717485 0 58 | 1.785928 7.718645 1 59 | -0.918801 11.560217 0 60 | -0.364009 4.747300 1 61 | -0.841722 4.119083 1 62 | 0.490426 1.960539 1 63 | -0.007194 9.075792 0 64 | 0.356107 12.447863 0 65 | 0.342578 12.281162 0 66 | -0.810823 -1.466018 1 67 | 2.530777 6.476801 1 68 | 1.296683 11.607559 0 69 | 0.475487 12.040035 0 70 | -0.783277 11.009725 0 71 | 0.074798 11.023650 0 72 | -1.337472 0.468339 1 73 | -0.102781 13.763651 0 74 | -0.147324 2.874846 1 75 | 0.518389 9.887035 0 76 | 1.015399 7.571882 0 77 | -1.658086 -0.027255 1 78 | 1.319944 2.171228 1 79 | 2.056216 5.019981 1 80 | -0.851633 4.375691 1 81 | -1.510047 6.061992 0 82 | -1.076637 -3.181888 1 83 | 1.821096 10.283990 0 84 | 3.010150 8.401766 1 85 | -1.099458 1.688274 1 86 | -0.834872 -1.733869 1 87 | -0.846637 3.849075 1 88 | 1.400102 12.628781 0 89 | 1.752842 5.468166 1 90 | 0.078557 0.059736 1 91 | 0.089392 -0.715300 1 92 | 1.825662 12.693808 0 93 | 0.197445 9.744638 0 94 | 0.126117 0.922311 1 95 | -0.679797 1.220530 1 96 | 0.677983 2.556666 1 97 | 0.761349 10.693862 0 98 | -2.168791 0.143632 1 99 | 1.388610 9.341997 0 100 | 0.317029 14.739025 0 -------------------------------------------------------------------------------- /python_utils/sk_sc_pd_operator/pd_date_time.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import matplotlib.pyplot as plt 6 | import seaborn 7 | seaborn.set() 8 | 9 | 10 | def np_dt(): 11 | date = np.array('2017-09-28', dtype=np.datetime64) 12 | print date + np.arange(5) 13 | # ['2017-09-28' '2017-09-29' '2017-09-30' '2017-10-01' '2017-10-02'] 14 | 15 | 16 | def series_dt(): 17 | index = pd.DatetimeIndex(['2014-07-04', '2014-08-04', 18 | '2015-07-04', '2015-08-04']) 19 | data = pd.Series([0, 1, 2, 3], index=index) 20 | print data 21 | # 2014-07-04 0 22 | # 2014-08-04 1 23 | # 2015-07-04 2 24 | # 2015-08-04 3 25 | # dtype: int64 26 | 27 | print data['2014-07-04':'2014-09-04'] 28 | # 2014-07-04 0 29 | # 2014-08-04 1 30 | # dtype: int64 31 | 32 | print data['2015'] 33 | # 2015-07-04 2 34 | # 2015-08-04 3 35 | # dtype: int64 36 | 37 | 38 | def pd_time(): 39 | from datetime import datetime 40 | dates = pd.to_datetime([datetime(2015, 7, 3), '4th of July, 2015', 41 | '2015-Jul-6', '07-07-2015', '20150708']) 42 | 43 | print dates 44 | 45 | print dates.to_period('D') 46 | # PeriodIndex(['2015-07-03', '2015-07-04', '2015-07-06', '2015-07-07', 47 | # '2015-07-08'], 48 | # dtype='int64', freq='D') 49 | print dates - dates[0] 50 | # TimedeltaIndex(['0 days', '1 days', '3 days', 51 | # '4 days', '5 days'], 52 | # dtype='timedelta64[ns]', freq=None) 53 | 54 | print pd.date_range('2015-07-03', '2015-07-5') 55 | # DatetimeIndex(['2015-07-03', '2015-07-04', '2015-07-05'], 56 | # dtype='datetime64[ns]', freq='D') 57 | print pd.date_range('2015-07-03', periods=3) 58 | # as above 59 | 60 | print pd.date_range('2015-07-03', periods=3, freq='H') 61 | # DatetimeIndex(['2015-07-03 00:00:00', '2015-07-03 01:00:00', 62 | # '2015-07-03 02:00:00'], 63 | # dtype='datetime64[ns]', freq='H') 64 | 65 | print pd.period_range('2015-07', periods=3, freq='M') 66 | # PeriodIndex(['2015-07', '2015-08', '2015-09'], 67 | # dtype='int64', freq='M') 68 | 69 | 70 | def pd_time_offset(): 71 | from pandas.tseries.offsets import BDay 72 | 73 | print pd.timedelta_range(0, periods=3, freq="2H30T") 74 | # TimedeltaIndex(['00:00:00', '02:30:00', '05:00:00'], 75 | # dtype='timedelta64[ns]', freq='150T') 76 | print pd.date_range('2015-07-01', periods=3, freq=BDay()) 77 | # DatetimeIndex(['2015-07-01', '2015-07-02', '2015-07-03'], 78 | # dtype='datetime64[ns]', freq='B') 79 | 80 | 81 | def pandas_datareader_1(): 82 | from pandas_datareader import data 83 | goog = data.DataReader('GOOG', start='2004', end='2016', 84 | data_source='google') 85 | 86 | print goog.head() 87 | 88 | goog = goog['Close'] 89 | goog.plot() 90 | 91 | 92 | if __name__ == '__main__': 93 | pandas_datareader_1() 94 | # pd_time_offset() 95 | # pd_time() 96 | # series_dt() 97 | # np_dt() 98 | pass 99 | -------------------------------------------------------------------------------- /python_utils/py_basic/time_ope.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | """ 4 | get the seconds since epoch from the time + date output 5 | function time consume 6 | https://stackoverflow.com/questions/5478351/python-time-measure-function 7 | """ 8 | 9 | import time 10 | import timeit 11 | 12 | 13 | def test_time_consume(): 14 | start = time.clock() 15 | time.sleep(1) 16 | print time.clock() - start 17 | # 0.999735009203 18 | 19 | 20 | def timing(f): 21 | def wrap(*args): 22 | start = time.time() 23 | ret = f(*args) 24 | end = time.time() 25 | print '%s function took %0.3f ms' % (f.func_name, (end - start) * 1000.0) 26 | return ret 27 | return wrap 28 | 29 | 30 | @timing 31 | def test_time(): 32 | time.sleep(1.1) 33 | # test_time function took 1101.000 ms 34 | 35 | 36 | def timeit_test(): 37 | timeit.timeit() 38 | 39 | 40 | def sleep_milliseconds(mi_sec=50): 41 | 42 | time.sleep(mi_sec / 1000.0) 43 | 44 | 45 | @timing 46 | def test_sp_mi_sec(): 47 | sleep_milliseconds() 48 | 49 | 50 | def test_time_transform(): 51 | # time_stamp = int(time.time()) 52 | # s = '123' 53 | # print time_stamp 54 | # # 1509953402 55 | # print "%s_%s.pcm" % (s, time_stamp) 56 | # # 123_1509953402.pcm 57 | # s += str(time_stamp) 58 | # print s 59 | 60 | # print time.ctime(time_stamp) 61 | # Thu Jun 28 07:58:58 2018 62 | print time.strftime("%Y-%m-%d %H:%M:%S") 63 | # 2018-06-28 08:00:35 64 | 65 | # print time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(1509953402)) 66 | # # 2017-11-06 15:30:02 67 | # 68 | # time_stamp = int(time.time()) 69 | # print time_stamp 70 | # print divmod(time_stamp, 3600) 71 | 72 | 73 | def str_time(): 74 | import datetime 75 | str_time = '2018-02-01 0:0:0' 76 | d = datetime.datetime.strptime(str_time, "%Y-%m-%d %H:%M:%S") 77 | print d, d.strftime("%Y-%m-%d %H:%M:%S") 78 | # 2018-02-01 00:00:00, 2018-02-01 00:00:00 79 | 80 | for i in xrange(2): 81 | print d, d + datetime.timedelta(minutes=30) 82 | d = d + datetime.timedelta(minutes=30) 83 | 84 | # 2018-02-01 00:00:00 2018-02-01 00:30:00 85 | # 2018-02-01 00:30:00 2018-02-01 01:00:00 86 | 87 | 88 | if __name__ == '__main__': 89 | # import datetime 90 | # str_time = '2018-03-08T08:00:00.000' 91 | # d = datetime.datetime.strptime(str_time, "%Y-%m-%dT%H:%M:%S.%f") 92 | # print d 93 | # 2018-03-08 08:00:00 94 | 95 | import time 96 | str_time = '2018-03-08T08:00:00.000' 97 | # str_time.replace('T', ' ') 98 | 99 | # d = time.strftime("%Y-%m-%d %H:%M:%S.%f") 100 | 101 | import time 102 | 103 | str_time = '2018-03-08T08:00:00.000' 104 | d = time.strptime(str_time, "%Y-%m-%dT%H:%M:%S.%f") 105 | 106 | print d 107 | # time.struct_time(tm_year=2018, tm_mon=3, tm_mday=8, tm_hour=8, tm_min=0, tm_sec=0, tm_wday=3, tm_yday=67, tm_isdst=-1) 108 | 109 | print time.strftime("%Y-%m-%d %H:%M:%S", d) 110 | # 2018-03-08 08:00:00 111 | 112 | # str_time() 113 | # test_time_consume() 114 | # test_time() 115 | # test_sp_mi_sec() 116 | # test_time_transform() 117 | pass 118 | -------------------------------------------------------------------------------- /python_utils/http_basic/url_ope.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | 4 | import urllib 5 | import requests 6 | import urlparse 7 | 8 | 9 | def url_quote(): 10 | raw_url = 'http://bs2-audiorec.oss-cn-shanghai.aliyuncs.com/e98f81fb526061e92a26eda17955c219.pcm' 11 | url = urllib.quote(raw_url) 12 | print url 13 | # http%3A//bs2-audiorec.oss-cn-shanghai.aliyuncs.com/e98f81fb526061e92a26eda17955c219.pcm 14 | print urllib.unquote(url) 15 | # http://bs2-audiorec.oss-cn-shanghai.aliyuncs.com/e98f81fb526061e92a26eda17955c219.pcm 16 | print urllib.quote("河=&源") 17 | # %E6%B2%B3%E6%BA%90 18 | 19 | 20 | def req_with_para(): 21 | d_para = {"name": "xy", "age": 21} 22 | print requests.get('http://xy.com', params=d_para) 23 | 24 | # ordered name-value pairs 25 | d_sorted_para = [("age", 21), ("name", "xy")] 26 | print requests.get('http://xy.com', params=d_sorted_para) 27 | 28 | 29 | def url_encode_v1(): 30 | f = {'eventName': 'myEvent', 'eventDescription': '飞龙在天'} 31 | print urllib.urlencode(f) 32 | # eventName=myEvent&eventDescription=%E9%A3%9E%E9%BE%99%E5%9C%A8%E5%A4%A9 33 | 34 | 35 | def url_encode_v2(): 36 | d_para = {"name": "xy熊大", "age": 21} 37 | print '&'.join('%s=%s' % (k, v) for k, v in d_para.iteritems()) 38 | # age=21&name=xy熊大 39 | print '&'.join('%s=%s' % (k, urllib.quote(str(v))) for k, v in d_para.iteritems()) 40 | # age=21&name=xy%E7%86%8A%E5%A4%A7 41 | 42 | base_url = 'xy.com/' 43 | url = 'http://%s?%s' % (base_url, '&'.join('%s=%s' % (k, urllib.quote(str(v))) for k, v in d_para.iteritems())) 44 | print url 45 | # http://xy.com/?age=21&name=xy%E7%86%8A%E5%A4%A7 46 | 47 | print urllib.unquote(url) 48 | # http://xy.com/?age=21&name=xy熊大 49 | 50 | 51 | def post_request_tw(): 52 | mp4Url = 'https://bilinimg.bs2ul-ssl.yy.com/android2222.mp4' 53 | mp4Url = 'http://bilinaudiop.bs2dl.yy.com/odgud7b58056e79243f6bacb6580ce0506b1_36695268409460340_37155969.mp4?token=sgCAAFyARE0BAM2BQ1oAAAAAfTtDWgAAAAAMsEEkB0NPTlRFWFQJaQB7ImJ1Y2tldCI6ImJpbGluYXVkaW9wIiwiZmlsZW5hbWUiOiJvZGd1ZDdiNTgwNTZlNzkyNDNmNmJhY2I2NTgwY2UwNTA2YjFfMzY2OTUyNjg0MDk0NjAzNDBfMzcxNTU5NjkubXA0In0EQVVUSAMEAAMAAADI_C7ba_qUVQLIXkAWf7r_sF_FnQ' 54 | 55 | d_para = {"mp4Url": mp4Url, 56 | "secretKey": "XY-bl-audio-rec-text-ret", 57 | "serial": "17598411"} 58 | 59 | print urllib.urlencode(d_para) 60 | 61 | # rsp = requests.get('http://172.27.49.16:8887/bilin/audiorec/', params=d_para) 62 | rsp = requests.get('http://61.147.186.82:9997/bilin/audiorec/', params=d_para) 63 | print rsp.url # 输出请求的 url 64 | print rsp.content 65 | 66 | s = "sign=052c177ab75dfd53ab6b1cdc25569ef1&text=%E9%83%BD%E6%95%8F%E6%B3%95%E8%BD%AE%E5%8A%9F%E7%BB%83%E4%B9%A0%E8%80%85%E8%B7%B3%E6%A5%BC%E5%89%B2%E8%85%95%E6%8A%95%E6%B2%B3%EF%BC%8C&ts=1513238084&code=0&serial=17598411" 67 | print urllib.unquote(s) 68 | 69 | 70 | def url_parse(): 71 | """get url query parameters""" 72 | url = 'http://foo.appspot.com/abc?def=ghi' 73 | parsed = urlparse.urlparse(url) 74 | 75 | print urlparse.parse_qs(parsed.query)['def'] 76 | # ['ghi'] 77 | 78 | 79 | if __name__ == '__main__': 80 | url_parse() 81 | # post_request_tw() 82 | # url_quote() 83 | # url_encode_v1() 84 | # url_encode_v2() 85 | pass 86 | -------------------------------------------------------------------------------- /python_utils/numpy_operate/random_arr.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | """ 4 | This module is some example about random array 5 | """ 6 | import numpy as np 7 | 8 | 9 | def generate_random_2d_arr(col, row): 10 | """ 11 | generate random 2d array from 0~col*row 12 | :param col: the num of column 13 | :param row: the num of row 14 | :return: like generate_random_2d_arr(4, 3) 15 | [[ 7 10 5] 16 | [ 3 4 2] 17 | [ 8 11 6] 18 | [ 9 1 0]] 19 | """ 20 | return np.random.permutation(col * row).reshape(row, col) 21 | 22 | 23 | def random_arr(): 24 | a = np.random.random(size=(2, 4)) 25 | print a 26 | # [[ 0.13652737 0.32546344 0.58527282 0.0899639 ] 27 | # [ 0.21190661 0.05351992 0.42603268 0.17524264]] 28 | 29 | 30 | def random_int_arr(): 31 | print np.random.random_integers(5) 32 | # like 3 33 | arr = np.random.random_integers(12, size=(3, 4)) 34 | print arr 35 | # [[ 2 9 7 6] 36 | # [ 9 1 9 1] 37 | # [ 8 6 11 5]] 38 | d1 = np.random.random_integers(1, 6, 10) 39 | print d1 40 | # [6 4 5 2 4 1 1 5 6 2] 41 | arr_f = 0.5 * (np.random.random_integers(12, size=(8, )) - 1) 42 | print arr_f 43 | # [ 5.5 2.5 2.5 1. 4. 4.5 5.5 3. ] 44 | print np.random.randint(12, size=(3, 4)) 45 | # [[0 7 1 8] 46 | # [7 1 1 2] 47 | # [8 4 9 3]] 48 | 49 | 50 | def sample_rows(): 51 | arr1 = np.random.randint(5, size=(5, 3)) 52 | print arr1 53 | # [[0 0 2] 54 | # [1 2 0] 55 | # [0 0 4] 56 | # [3 3 4] 57 | # [4 3 2]] 58 | 59 | print arr1[[1, 2]] 60 | # [[1 2 0] 61 | # [0 0 4]] 62 | 63 | idx = np.random.randint(5, size=2) 64 | print idx 65 | # [1 2] 66 | print arr1[idx, :] 67 | # [[1 2 0] 68 | # [0 0 4]] 69 | print arr1[idx, ] 70 | # [[1 2 0] 71 | # [0 0 4]] 72 | 73 | print arr1[np.random.randint(arr1.shape[0], size=2), :] 74 | # [[0 0 2] 75 | # [4 3 2]] 76 | 77 | 78 | def choice_arr(): 79 | """ 80 | numpy.random.choice(a, size=None, replace=True, p=None) 81 | Generates a random sample from a given 1-D array 82 | a : 1-D array-like or int 83 | If an ndarray, a random sample is generated from its elements. 84 | If an int, the random sample is generated as if a were np.arange(a) 85 | """ 86 | arr1 = np.arange(5) 87 | print arr1 88 | # [0 1 2 3 4] 89 | print np.random.choice(arr1, 2) 90 | # [4 0] 91 | print np.random.choice(5, 2) 92 | # [3 0] 93 | 94 | 95 | def ran_seed(): 96 | sd = 3 97 | np.random.seed(sd) 98 | print np.random.rand(4) 99 | # [ 0.5507979 0.70814782 0.29090474 0.51082761] 100 | print np.random.rand(4) 101 | # [ 0.89294695 0.89629309 0.12558531 0.20724288] 102 | 103 | np.random.seed(sd) 104 | print np.random.rand(4) 105 | # [ 0.5507979 0.70814782 0.29090474 0.51082761] 106 | 107 | np.random.seed(sd) 108 | arr = np.random.randint(5, size=(2, 3)) 109 | print arr 110 | # [[2 0 1] 111 | # [3 0 0]] 112 | arr = np.random.randint(5, size=(2, 3)) 113 | print arr 114 | # [[0 3 2] 115 | # [3 1 1]] 116 | np.random.seed(sd) 117 | arr = np.random.randint(5, size=(2, 3)) 118 | print arr 119 | # [[2 0 1] 120 | # [3 0 0]] 121 | 122 | if __name__ == '__main__': 123 | ran_seed() 124 | # choice_arr() 125 | # print generate_random_2d_arr(4, 3) 126 | # random_arr() 127 | # random_int_arr() 128 | 129 | pass 130 | -------------------------------------------------------------------------------- /python_utils/matplot/basic.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | """ 3 | This file is about `matplotlib` 4 | Mainly cited from http://matplotlib.org/users/pyplot_tutorial.html 5 | """ 6 | 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | import random 10 | 11 | 12 | def basic_linear(): 13 | y_lst = [1, 2, 3, 4] 14 | # y_lst = random.sample(range(80000), 500) 15 | plt.plot(y_lst) 16 | plt.ylabel('y axis value') 17 | plt.show() 18 | 19 | 20 | def plot_x1_y4(): 21 | """ 22 | plot a line x = 1 23 | :return: 24 | """ 25 | plt.plot([0, 0], [0, 4], color='red', linewidth=3.0) 26 | plt.axis([-1, 1, -4, 4]) 27 | plt.show() 28 | 29 | 30 | def basic_curve(): 31 | x = np.linspace(0, 2, 11) 32 | print x 33 | y = x ** 3 - 5 * x ** 2 + 6 * x + 1 34 | print y 35 | # plt.plot(x, y, 'r-') 36 | # plt.plot(x, y) 37 | lines = plt.plot([1, 2, 3, 4], [1, 4, 9, 16]) 38 | plt.setp(lines, color='r') 39 | plt.show() 40 | # plt.axis([0, 100, 0, 100]) 41 | 42 | 43 | def multi_curve(): 44 | t = np.arange(0., 5., 0.2) 45 | print t 46 | # plt.plot(t, t, 'r-', t, t**2, 'bs', t, t**3, 'g^') 47 | # plt.show() 48 | 49 | 50 | def f(t): 51 | return np.exp(-t) * np.cos(2 * np.pi * t) 52 | 53 | 54 | def multi_figure(): 55 | plt.figure(1) # the first figure 56 | plt.subplot(211) # the first subplot in the first figure 57 | plt.plot([1, 2, 3]) 58 | plt.subplot(212) # the second subplot in the first figure 59 | plt.plot([4, 5, 6, 7, 11]) 60 | 61 | plt.figure(2) # a second figure 62 | plt.plot([4, 5, 6]) # creates a subplot(111) by default 63 | 64 | plt.figure(1) # figure 1 current; subplot(212) still current 65 | plt.subplot(211) # make subplot(211) in figure1 current 66 | plt.title('Easy as 1, 2, 3') # subplot 211 title 67 | 68 | plt.show() 69 | 70 | 71 | def multi_figure_two(): 72 | t1 = np.arange(0., 5, 0.1) 73 | t2 = np.arange(0., 5, 0.02) 74 | 75 | plt.figure(1) 76 | plt.subplot(211) 77 | plt.plot(t1, f(t1), 'k') 78 | 79 | plt.subplot(212) 80 | plt.plot(t2, np.cos(2 * np.pi * t2), 'bo') 81 | 82 | plt.show() 83 | 84 | 85 | def histogram(): 86 | x_mul = [np.random.randn(n) for n in [1000, 1000, 1000]] 87 | print x_mul 88 | bin = 10 89 | plt.hist(x_mul, bin) 90 | plt.show() 91 | 92 | 93 | def histogram_two(): 94 | x_mul = [random.sample(range(0, 100), n) for n in [60, 50, 70]] 95 | print x_mul[0] 96 | print x_mul[1] 97 | print x_mul[2] 98 | bin = 10 99 | plt.hist(x_mul, bin) 100 | plt.show() 101 | 102 | 103 | def plot_2d(): 104 | x = [1, 2, 3, 4, 5, 6, 7] 105 | y = [2.6, 3.6, 8.3, 56, 12.7, 8.9, 5.3] 106 | plt.plot(x, y) # plot line 107 | # plt.scatter(x, y) # plot scatter 108 | plt.show() 109 | 110 | 111 | def plot_orthogonal(): 112 | arr = np.array([[-0.85389096, -0.52045195], [0.52045195, -0.85389096]]) 113 | # arr = np.array([[1, -1], [1, 1]]) 114 | v1_x, v2_x = [arr[:, 0][0], 0], [arr[:, 1][0], 0] 115 | v1_y, v2_y = [arr[:, 0][1], 0], [arr[:, 1][1], 0] 116 | plt.plot(v1_x, v1_y) 117 | plt.plot(v2_x, v2_y) 118 | # plt.axis([-1, 1, -1, 1]) 119 | # set the below bound, or the line won't seem orthogonal 120 | # plt.axis([-0.85389096, 0.52045195, -0.85389096, 0.52045195]) 121 | plt.show() 122 | 123 | 124 | if __name__ == '__main__': 125 | plot_orthogonal() 126 | # plot_2d() 127 | # basic_linear() 128 | # basic_curve() 129 | # multi_curve() 130 | # multi_figure() 131 | # multi_figure_two() 132 | # histogram() 133 | # histogram_two() 134 | pass 135 | -------------------------------------------------------------------------------- /python_utils/machine_learn/neural_network_keras/nn_keras_digits.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | """ 3 | This file is mainly use keras to recognize digits,with 4 | Multi-Layer perceptrons or neural network 5 | """ 6 | from keras.datasets import mnist 7 | import gzip 8 | import matplotlib.pyplot as plt 9 | import sys 10 | # from six.moves import cPickle 11 | import cPickle 12 | import numpy as np 13 | from keras.models import Sequential 14 | from keras.layers import Dense, Activation 15 | from keras.layers import Dropout 16 | from keras.utils import np_utils 17 | 18 | 19 | def load_data(): 20 | """ 21 | this function is used to load data 22 | :return: 23 | """ 24 | file = '../dataset/mnist/mnist.pkl.gz' 25 | f = gzip.open(file, 'rb') 26 | if sys.version_info < (3,): 27 | data = cPickle.load(f) 28 | else: 29 | data = cPickle.load(f, encoding='bytes') 30 | f.close() 31 | return data 32 | 33 | 34 | def show_image(): 35 | """ 36 | this function is for a test to show, server image 37 | :return: 38 | """ 39 | (X_train, y_train), (X_validation, y_validation), (X_test, y_test) = load_data() 40 | # (X_train, y_train), (X_test, y_test) = mnist.load_data() 41 | # plot 4 images as gray scale 42 | plt.subplot(221) 43 | plt.imshow(X_train[0], cmap=plt.get_cmap('gray')) 44 | plt.subplot(222) 45 | plt.imshow(X_train[1], cmap=plt.get_cmap('gray')) 46 | plt.subplot(223) 47 | plt.imshow(X_train[2], cmap=plt.get_cmap('gray')) 48 | plt.subplot(224) 49 | plt.imshow(X_train[3], cmap=plt.get_cmap('gray')) 50 | # show the plot 51 | plt.show() 52 | 53 | 54 | def generate_data(): 55 | (X_train, y_train), (X_test, y_test) = load_data() 56 | 57 | # flatten 28*28 images to a 784 vector for each image 58 | print X_train.shape[1], X_train.shape[2], X_train.shape 59 | # X_train.shape -> (60000L, 28L, 28L) 60 | num_pixels = X_train.shape[1] * X_train.shape[2] 61 | X_train = X_train.reshape(X_train.shape[0], num_pixels).astype('float32') 62 | X_test = X_test.reshape(X_test.shape[0], num_pixels).astype('float32') 63 | 64 | # normalize inputs from 0-255 to 0-1 65 | X_train = X_train / 255 66 | X_test = X_test / 255 67 | 68 | y_train = np_utils.to_categorical(y_train) 69 | y_test = np_utils.to_categorical(y_test) 70 | # print y_train.shape, y_test.shape 71 | # y_train.shape -> (60000L, 10L), y_test.shape -> (10000L, 10L) 72 | num_classes = y_test.shape[1] 73 | 74 | return X_train, y_train, X_test, y_test 75 | 76 | 77 | def baseline_model(): 78 | """ 79 | define baseline model 80 | :return: 81 | """ 82 | # create model 83 | model = Sequential() 84 | 85 | num_pixels = 784 86 | # model.add(Dense(num_pixels, input_dim=num_pixels, init='normal', activation='relu')) 87 | model.add(Dense(num_pixels, input_dim=num_pixels, activation='relu')) 88 | num_classes = 10 89 | # model.add(Dense(num_classes, init='normal', activation='softmax')) 90 | model.add(Dense(num_classes, activation='softmax')) 91 | # Compile model 92 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 93 | return model 94 | 95 | 96 | def train_and_evaluate(): 97 | X_train, y_train, X_test, y_test = generate_data() 98 | model = baseline_model() 99 | model.fit(X_train, y_train, validation_data=(X_test, y_test), nb_epoch=10, batch_size=200, verbose=2) 100 | # Final evaluation of the model 101 | scores = model.evaluate(X_test, y_test, verbose=0) 102 | print("Baseline Error: %.2f%%" % (100-scores[1]*100)) 103 | 104 | 105 | if __name__ == '__main__': 106 | # load_data() 107 | generate_data() 108 | # train_and_evaluate() 109 | pass 110 | -------------------------------------------------------------------------------- /python_utils/sk_sc_pd_operator/pd_pivot.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import seaborn as sns 6 | import matplotlib.pyplot as plt 7 | sns.set() # use Seaborn styles 8 | 9 | 10 | def titanic_1(): 11 | titanic = sns.load_dataset('titanic') 12 | print titanic.head() 13 | # survived pclass sex age ...... 14 | # 0 0 male 22 15 | # 1 1 1 female 38.0 16 | # 2 1 3 female 26.0 17 | # 3 1 1 female 35.0 18 | # 4 0 3 male 35.0 19 | 20 | print titanic.groupby('sex')[['survived']].mean() 21 | # survived 22 | # sex 23 | # female 0.742038 24 | # male 0.188908 25 | 26 | print titanic.groupby(['sex', 'class'])['survived'].aggregate('mean').unstack() 27 | # class First Second Third 28 | # sex 29 | # female 0.968085 0.921053 0.500000 30 | # male 0.368852 0.157407 0.135447 31 | 32 | print titanic.pivot_table('survived', index='sex', columns='class') 33 | # class First Second Third 34 | # sex 35 | # female 0.968085 0.921053 0.500000 36 | # male 0.368852 0.157407 0.135447 37 | 38 | age = pd.cut(titanic['age'], [0, 18, 80]) 39 | print titanic.pivot_table('survived', ['sex', age], 'class') 40 | # class First Second Third 41 | # sex age 42 | # female (0, 18] 0.909091 1.000000 0.511628 43 | # (18, 80] 0.972973 0.900000 0.423729 44 | # male (0, 18] 0.800000 0.600000 0.215686 45 | # (18, 80] 0.375000 0.071429 0.133663 46 | 47 | print titanic.pivot_table(index='sex', columns='class', 48 | aggfunc={'survived': sum, 'fare': 'mean'}) 49 | 50 | print titanic.pivot_table('survived', index='sex', columns='class', margins=True) 51 | # class First Second Third All 52 | # sex 53 | # female 0.968085 0.921053 0.500000 0.742038 54 | # male 0.368852 0.157407 0.135447 0.188908 55 | # All 0.629630 0.472826 0.242363 0.383838 56 | 57 | 58 | def births_demo(): 59 | path = 'E:/python_code/births.csv' 60 | births = pd.read_csv(path) 61 | print births.head() 62 | # year month day gender births 63 | # 0 1969 1 1 F 4046 64 | # 1 1969 1 1 M 4440 65 | # 2 1969 1 2 F 4454 66 | # 3 1969 1 2 M 4548 67 | # 4 1969 1 3 F 4548 68 | 69 | births['decade'] = 10 * (births['year'] // 10) 70 | print births.pivot_table('births', index='decade', columns='gender', aggfunc='sum') 71 | # gender F M 72 | # decade 73 | # 1960 1753634 1846572 74 | # 1970 16263075 17121550 75 | # 1980 18310351 19243452 76 | # 1990 19479454 20420553 77 | # 2000 18229309 19106428 78 | 79 | births.pivot_table('births', index='year', columns='gender', aggfunc='sum').plot() 80 | 81 | plt.ylabel('total births per year') 82 | plt.show() 83 | 84 | # create a datetime index from the year, month, day 85 | births.index = pd.to_datetime(10000 * births.year + 86 | 100 * births.month + 87 | births.day, format='%Y%m%d') 88 | 89 | births['dayofweek'] = births.index.dayofweek 90 | births.pivot_table('births', index='dayofweek', 91 | columns='decade', aggfunc='mean').plot() 92 | 93 | plt.gca().set_xticklabels(['Mon', 'Tues', 'Wed', 'Thurs', 'Fri', 'Sat', 'Sun']) 94 | 95 | plt.ylabel('mean births by day') 96 | plt.show() 97 | 98 | if __name__ == '__main__': 99 | births_demo() 100 | # titanic_1() 101 | pass 102 | -------------------------------------------------------------------------------- /python_utils/thread_process/thread_queue.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | """ 3 | 4 | """ 5 | import Queue 6 | import threading 7 | import logging 8 | import random 9 | import time 10 | from basic_thread import join_all_others_thread 11 | logging.basicConfig(level=logging.DEBUG, 12 | format='%(levelname)s %(asctime)s %(threadName)s %(message)s', 13 | datefmt='%Y-%m-%d %I:%M:%S') 14 | lst_que = Queue.Queue() 15 | 16 | 17 | def produce_item(): 18 | return threading.currentThread().name, random.randint(0, 10) 19 | pass 20 | 21 | 22 | def producer(num): 23 | for i in xrange(num): 24 | item = produce_item() 25 | lst_que.put(item) 26 | logging.info('produce item : ' + str(item)) 27 | time.sleep(0.5) 28 | 29 | 30 | def consume(): 31 | while True: 32 | try: 33 | # non-block if lst_queue is empty then, it will raise Empty error 34 | item = lst_que.get(False) 35 | if item: 36 | logging.debug('consume item: ' + str(item)) 37 | time.sleep(0.5) 38 | except Queue.Empty, e: 39 | # if lst_que is empty then do the following code snippet 40 | logging.warn('queue empty ' + str(e) + 'now sleep 1 S') 41 | time.sleep(1) 42 | 43 | 44 | def create_mul_thread(thread_num, prefix_name, target_name): 45 | """ 46 | A template of creating and starting n thread, do the same task. 47 | :param thread_num: the num of thread 48 | :param prefix_name: 49 | :param target_name: 50 | :return: 51 | """ 52 | for i in xrange(thread_num): 53 | t_name = prefix_name + str(i) 54 | produce_num = random.randint(10, 100) 55 | if prefix_name == 'consume--': 56 | t = threading.Thread(name=t_name, target=target_name) 57 | else: 58 | t = threading.Thread(name=t_name, target=target_name, args=(produce_num, )) 59 | t.start() 60 | 61 | 62 | def create_mul_thread_producer(num): 63 | for i in xrange(num): 64 | t_name = 'producer--' + str(i) 65 | produce_num = random.randint(10, 100) 66 | t = threading.Thread(name=t_name, target=producer, args=(produce_num, )) 67 | t.start() 68 | 69 | 70 | def test_consume_produce_queue(): 71 | produce_num, consume_num = 2, 3 72 | # create 2 producer thread 73 | create_mul_thread(produce_num, 'producer--', producer) 74 | # create 3 consumer thread 75 | create_mul_thread(consume_num, 'consume--', consume) 76 | pass 77 | 78 | 79 | def consume_echo(): 80 | logging.info('set gpu mode, load caffe net') 81 | while True: 82 | item = lst_que.get(True) 83 | logging.info('recognize %s', item) 84 | 85 | 86 | def get_input_text(): 87 | while True: 88 | text = raw_input("please input a sentence") 89 | lst_que.put(text, True) 90 | 91 | if 'exit' == text: 92 | break 93 | 94 | 95 | def create_echo_cp(): 96 | """ 97 | create consume_echo, get_input_text thread 98 | """ 99 | c1 = threading.Thread(name="c1", target=consume_echo) 100 | c2 = threading.Thread(name="c2", target=consume_echo) 101 | p1 = threading.Thread(name="p1", target=get_input_text) 102 | c1.start() 103 | c2.start() 104 | p1.start() 105 | join_all_others_thread() 106 | 107 | 108 | def get_que_len(): 109 | global lst_que 110 | 111 | lst_que.put('abc') 112 | print lst_que.qsize() 113 | lst_que.put('abc') 114 | lst_que.put('abc') 115 | print lst_que.qsize() 116 | lst_que.put('abc') 117 | lst_que.put('abc') 118 | print lst_que.qsize() 119 | 120 | if __name__ == '__main__': 121 | # test_consume_produce_queue() 122 | # create_echo_cp() 123 | s = 'abc' 124 | s = s + '123' 125 | print s 126 | pass 127 | -------------------------------------------------------------------------------- /python_utils/numpy_operate/array_create.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | 3 | """ 4 | This file is about ways to create different types of np.array(), 5 | like identity, diagonal matrix and so on. 6 | """ 7 | 8 | import numpy as np 9 | 10 | 11 | def common_create(): 12 | """ 13 | common way of creating array 14 | :return: none 15 | """ 16 | arr1 = np.array([1, 2]) 17 | print arr1 18 | # [1 2] 19 | print arr1.shape 20 | # (2L,) 21 | arr2 = np.array([[1, 2], [3.1, 4.]]) 22 | print arr2 23 | # [[ 1. 2. ] 24 | # [ 3.1 4. ]] 25 | print arr2.shape 26 | # (2L, 2L) 27 | arr3 = np.array([[1, 2], [3, 4]], dtype=complex) 28 | print arr3 29 | # [[ 1.+0.j 2.+0.j] 30 | # [ 3.+0.j 4.+0.j]] 31 | 32 | 33 | def about_shape(): 34 | print np.array([[1, 2, 3], [3, 4, 5]]).shape 35 | # (2L, 3L) 36 | print np.array([[1, 2, 3]]).shape 37 | # (1L, 3L) 38 | print np.array([1, 2, 3]).shape 39 | # (3L,) 40 | arr1 = np.array([[1, 2, 3], [3, 4, 5]]) 41 | arr2 = np.array([1, 2, 3]) 42 | print arr1 * arr2 43 | # [[ 1 4 9] 44 | # [ 3 8 15]] 45 | # arr22 = np.array([[1], [2], [3]]) 46 | # print arr1 * arr22 47 | 48 | print np.dot(arr1, arr2) 49 | # [14 26] 50 | 51 | 52 | def about_reshape(): 53 | arr = np.array([[1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15]]) 54 | print arr.reshape(2, 6) 55 | # [[ 1 2 3 4 5 6] 56 | # [10 11 12 13 14 15]] 57 | b = np.arange(1, 13).reshape((2, 2, 3)) 58 | print b 59 | # [[[ 1 2 3] 60 | # [ 4 5 6]] 61 | # 62 | # [[ 7 8 9] 63 | # [10 11 12]]] 64 | print b.reshape((2, 6)) 65 | # [[ 1 2 3 4 5 6] 66 | # [ 7 8 9 10 11 12]] 67 | 68 | 69 | def lst_2_array(): 70 | """ 71 | list, tuple to array 72 | :return: none 73 | """ 74 | tp = (1, 2, 3) 75 | lst = [[1, 2], [3, 4]] 76 | print np.array(lst).shape 77 | # (2L, 2L) 78 | print np.array(lst) 79 | # [[1 2] 80 | # [3 4]] 81 | print np.asarray(lst) 82 | # [[1 2] 83 | # [3 4]] 84 | print np.asarray(tp) 85 | # [1 2 3] 86 | 87 | 88 | def file_2np_arr(): 89 | """txt file to numpy array""" 90 | data_path = '../machine_learn/dataset/perception/dataset.txt' 91 | x = np.loadtxt(data_path, dtype=float) 92 | print x 93 | # [ [ 1.1 2.2 0] 94 | # [ 3.5 3.6 1]] 95 | 96 | 97 | def empty_arr(): 98 | arr1 = np.arange(12).reshape(3, 4) 99 | print arr1 100 | # [[ 0 1 2 3] 101 | # [ 4 5 6 7] 102 | # [ 8 9 10 11]] 103 | arr2 = np.empty(arr1.shape) 104 | print arr2 105 | # [[ 0. 0. 0. 0.] 106 | # [ 0. 0. 0. 0.] 107 | # [ 0. 0. 0. 0.]] 108 | 109 | 110 | def test_ndim(): 111 | # Number of array dimensions. 112 | 113 | x = np.array([1, 2, 3]) 114 | print x.ndim 115 | # 1 116 | 117 | y = np.array([[1, 2, 3], [4, 5, 6]]) 118 | print y.ndim 119 | # 2 120 | 121 | z = np.arange(12).reshape((2, 2, 3)) 122 | print z.ndim 123 | # 3 124 | 125 | 126 | if __name__ == '__main__': 127 | test_ndim() 128 | # empty_arr() 129 | # file_2np_arr() 130 | # broadcast_demo() 131 | # about_shape() 132 | # common_create() 133 | # lst_2_array() 134 | # about_reshape() 135 | lst = [[1.1, 2.3, 3], [3, 4, 5]] 136 | arr = np.array(lst) 137 | # print arr[1, :] 138 | # [ 3. 4. 5.] 139 | # print arr[1] 140 | # [ 3. 4. 5.] 141 | # print arr[...] 142 | 143 | a = np.array([[1.1, 2.3, 3]]) 144 | # print a[0].tolist() 145 | # print "".join([str(i) + "-" for i in a[0].tolist()]) 146 | # 1.1-2.3-3.0- 147 | # if 4 in [1, 3, 5]: 148 | # print 'in it' 149 | # print arr[0, 1], arr[0, 1].flatten.A[0] 150 | # print np.array(lst)[:-1] 151 | pass 152 | -------------------------------------------------------------------------------- /python_utils/utils/JsonUtil.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | import json 3 | import codecs 4 | 5 | 6 | def byteify(input): 7 | """ 8 | the string of json typed unicode to str in python 9 | This function coming from stack overflow 10 | :param input: {u'first_name': u'Guido', u'last_name': u'jack'} 11 | :return: {'first_name': 'Guido', 'last_name': 'jack'} 12 | """ 13 | if isinstance(input, dict): 14 | return {byteify(key): byteify(value) 15 | for key, value in input.iteritems()} 16 | elif isinstance(input, list): 17 | return [byteify(element) for element in input] 18 | elif isinstance(input, unicode): 19 | return input.encode('utf-8') 20 | else: 21 | return input 22 | 23 | 24 | def get_json_from_file(filename): 25 | """注意如果自己创建的json.txt文件读取有问题,可以在pycharm中创建file.json然后再读取""" 26 | with open(filename) as jf: 27 | jsondata = json.load(jf) 28 | 29 | return byteify(jsondata) 30 | 31 | 32 | def put_unicode_to_str(): 33 | data = {"ocrMsg": u"\u556a\u556a\u76f4\u64ad\u514d\u8d39\u8bd5"} 34 | js = json.dumps(data, indent=4, ensure_ascii=False) 35 | print js 36 | 37 | 38 | def generate_keyword_jsonfile(lst): 39 | """ 40 | generate keyword_jsonfile 'search_keywords.json' to 41 | the directory config 42 | :return: 43 | """ 44 | 45 | js_data = {"0": [], "1": [], "2": []} 46 | for item in lst: 47 | id, value = str(item[0]), item[1] 48 | js_data[id].append(value) 49 | 50 | with codecs.open('../config/search_keywords.json', 'w', encoding='utf-8') as fp: 51 | # json.dump(js_data, fp) 52 | fp.write(json.dumps(js_data, indent=4, sort_keys=True, ensure_ascii=False)) 53 | 54 | if __name__ == '__main__': 55 | # generate_keyword_jsonfile() 56 | # print get_json_from_file('../config/search_keywords.json') 57 | js = {"aaData":[ {"id":21,"keyword":"\u8D85\u7BA1","keywordType":0}, {"id":43,"keyword":"\u516C\u5B89","keywordType":0}, {"id":44,"keyword":"\u519B\u88C5","keywordType":1}, {"id":45,"keyword":"\u66B4\u529B","keywordType":1}, {"id":46,"keyword":"\u519B\u670D","keywordType":2}, {"id":47,"keyword":"\u9732\u4E73","keywordType":2},], "data":[{"$ref":"$.aaData[0]"},{"$ref":"$.aaData[1]"}, {"$ref":"$.aaData[2]"},{"$ref":"$.aaData[3]"}, {"$ref":"$.aaData[2]"},{"$ref":"$.aaData[3]"}], "error":False,"iTotalDisplayRecords":6,"iTotalRecords":6,"recordsFiltered":6,"recordsTotal":6,"sEcho":"1","success":True} 58 | js = {"aaData":[{"id":195261,"keywordCore":"直播","keywordDepartment":"YY","keywordWarn":"裸聊","newsDate":"2017-02-28 20:14:01","newsTitle":"美女直播","updateKeywordDate":"2017-02-28 20:14:01","url":"http://www.junjiewang.com\/44756.html","webSrc":"junjie"}, {"id":195258,"keywordCore":"直播","keywordDepartment":"YY","keywordWarn":"裸聊","newsDate":"2017-02-28 20:13:58","newsTitle":"美女直播","updateKeywordDate":"2017-02-28 20:13:58","url":"http:www.junjiewang.com/45345.html","webSrc":"junjie"},], "contentList":[{"contentNum":67,"dateDay":"2017-01-08"},{"contentNum":20,"dateDay":"2017-01-09"}], "data":[{"$ref":"$.aaData[0]"},{"$ref":"$.aaData[1]"}], "error":False,"iTotalDisplayRecords":2,"iTotalRecords":2,"recordsFiltered":2,"recordsTotal":2,"sEcho":"3","success":True, "warnList":[{"contentNum":28,"dateDay":"2017-01-08"},{"contentNum":8,"dateDay":"2017-01-09"}]} 59 | js = {"aaData":[{"id":195261,"keywordCore":"\u76F4\u64AD","keywordDepartment":"YY","keywordWarn":"\u88F8\u804A","newsDate":"2017-02-28 20:14:01","newsTitle":"\u76F4\u64AD\u65B0\u89C4\u4ECA\u8D77\u5B9E\u65BD\uFF0C\u4F60\u5E94\u8BE5\u77E5\u9053\u7684\u516D\u4E2A\u95EE\u9898","updateKeywordDate":"2017-02-28 20:14:01","url":"http:\/\/www.junjiewang.com\/hulianwang\/44756.html","webSrc":"junjie"}],"data":[{"$ref":"$.aaData[0]"}],"error":False,"iTotalDisplayRecords":1,"iTotalRecords":1,"recordsFiltered":1,"recordsTotal":1,"sEcho":"11","success":True} 60 | # print json.dumps(js, indent=4) 61 | put_unicode_to_str() 62 | 63 | -------------------------------------------------------------------------------- /python_utils/machine_learn/perception/perception.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | import numpy as np 3 | from utils.FileUtil import get_line_lst 4 | import matplotlib.pyplot as plt 5 | 6 | 7 | class Perception(object): 8 | 9 | def __init__(self, var_num): 10 | # self.w = np.random.randn(1, var_num) 11 | self.w = np.ones(var_num) 12 | self.b = 1 13 | self.var_num = var_num 14 | self.min_error_rate = 0.02 15 | 16 | def train(self, train_data, eta): 17 | """ 18 | training model 19 | :param train_data: array like [[1, 2, 0], [1.1, 0.8, 1]] 20 | :param eta: learning rate: 21 | :return none: 22 | """ 23 | for item in train_data: 24 | output = (np.dot(self.w, item[0:-1]) + self.b)*item[-1] 25 | if output <= 0: 26 | self.w += eta * item[-1] * item[0:-1] 27 | self.b += eta * item[-1] 28 | 29 | def sgd(self, train_data, epoch, eta, batch_size): 30 | """ 31 | Training perception model by stochastic gradient descent 32 | :param train_data: 2D array like [[1.1, 2.3, -1]] the last 33 | item -1 train_date[0][-1] means label 34 | :param epoch: 35 | :param eta:learning rate 36 | :return:none 37 | """ 38 | for i in xrange(epoch): 39 | np.random.shuffle(train_data) 40 | batch_lst = [train_data[k:k+batch_size] for k in xrange(0, len(train_data), batch_size)] 41 | for mini_batch in batch_lst: 42 | self.train(mini_batch, eta) 43 | 44 | current_error_rate = self.get_error_rate(train_data) 45 | print 'epoch {0} current_error_rate: {1}'.format(i+1, current_error_rate) 46 | print self.get_current_para() 47 | if current_error_rate <= self.min_error_rate: 48 | break 49 | 50 | def get_error_rate(self, validate_data): 51 | all_len = validate_data.shape[0] 52 | error_len = 0 53 | for item in validate_data: 54 | output = np.dot(self.w, item[0:-1]) + self.b 55 | output = 1 if output >= 0 else -1 56 | error = True if output != item[-1] else False 57 | if error: 58 | error_len += 1 59 | 60 | return float(error_len) / all_len 61 | 62 | def get_current_para(self): 63 | return self.w, self.b 64 | 65 | def get_weight(self): 66 | return self.w 67 | 68 | def get_bias(self): 69 | return self.b 70 | 71 | 72 | def generate_data(data_path): 73 | lst_data = get_line_lst(data_path) 74 | 75 | # lst_ret = [] 76 | # for item in lst_data: 77 | # lst_ret.append([float(s) for s in item.split()]) 78 | # the following one line is equivalent to the above for loop 79 | lst_ret = [[float(s) for s in item.split()] for item in lst_data] 80 | 81 | ret_arr = np.array(lst_ret) 82 | 83 | # change all the label whose value is 0 to -1 84 | for i in xrange(ret_arr.shape[0]): 85 | if ret_arr[i][-1] == 0: 86 | ret_arr[i][-1] = -1 87 | 88 | return ret_arr 89 | 90 | 91 | def plot_data_scatter(train_data, w, b): 92 | x = np.linspace(-5, 5, 10) 93 | plt.figure() 94 | # 画散点图(plot scatter) 95 | for i in range(len(train_data)): 96 | if train_data[i][-1] == 1: 97 | plt.scatter(train_data[i][0], train_data[i][1], c=u'b') 98 | else: 99 | plt.scatter(train_data[i][0], train_data[i][1], c=u'r') 100 | 101 | # 画感知机分类,slope斜率图 102 | plt.plot(x, -(w[0]*x+b) / w[1], c=u'r') 103 | plt.show() 104 | 105 | if __name__ == '__main__': 106 | data_path = '../dataset/perception/dataset.txt' 107 | train_data = generate_data(data_path) 108 | epoch, eta, var_num, batch_size = 100, 0.1, 2, 20 109 | p = Perception(var_num) 110 | p.sgd(train_data, epoch, eta, batch_size) 111 | plot_data_scatter(train_data, p.get_weight(), p.get_bias()) 112 | -------------------------------------------------------------------------------- /python_utils/al_lt_common/al_cv.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import numpy as np 4 | import cv2 5 | 6 | 7 | def NMS(bboxes, threshold=0.5, model='union'): 8 | """ 9 | Non max suppression 10 | :param bboxes: tensor bounding boxes and scores sized [N, 5] 11 | :param threshold:float overlap threshold 12 | :param model: str 'union', 'min' 13 | :return: 14 | bboxes after nms 15 | picked indices 16 | """ 17 | x1 = bboxes[:, 0] 18 | y1 = bboxes[:, 1] 19 | x2 = bboxes[:, 2] 20 | y2 = bboxes[:, 3] 21 | scores = bboxes[:, 4] 22 | 23 | # all the box areas 24 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | # descending order scores 26 | orders = np.argsort(-scores) 27 | 28 | # store the saving indices of the bounding box 29 | keep_idx = [] 30 | 31 | while len(orders) > 0: 32 | idx = orders[0] 33 | keep_idx.append(idx) 34 | 35 | # tensor operator, compute all the intersect with the max score area 36 | xx1 = np.maximum(x1[idx], x1[orders[1:]]) 37 | yy1 = np.maximum(y1[idx], y1[orders[1:]]) 38 | xx2 = np.minimum(x2[idx], x2[orders[1:]]) 39 | yy2 = np.minimum(y2[idx], y2[orders[1:]]) 40 | 41 | w = np.maximum(0.0, xx2 - xx1 + 1) 42 | h = np.maximum(0.0, yy2 - yy1 + 1) 43 | # get all the intersect area, note this is tensor operator 44 | inter = w * h 45 | 46 | # compute the ratio of overlap, note this is tensor operator 47 | overlap_ratio = inter / (areas[idx] + areas[orders[1:]] - inter) 48 | 49 | inds = np.where(overlap_ratio <= threshold)[0] 50 | orders = orders[inds + 1] # add 1, because the first is the keep index 51 | 52 | return bboxes[keep_idx], keep_idx 53 | 54 | 55 | def draw_rect_score(img, lst_box): 56 | # draw rectangle, and score into img 57 | for box in lst_box: 58 | p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) 59 | cv2.rectangle(img, p1, p2, (0, 0, 255), 2) 60 | score = str(box[4]) 61 | cv2.putText(img, score, p1, cv2.FONT_HERSHEY_SIMPLEX, 1.1, (255, 0, 0), 2) 62 | 63 | 64 | def compute_iou(box1, box2): 65 | """ 66 | Compute iou rate of two box. 67 | :param box1: lst [x1, y1, x2, y2, score] 68 | :param box2: like box1 69 | :return: float iou rate 70 | """ 71 | # compute iou area x1, y1, x2, y2 72 | xx1 = max(box1[0], box2[0]) 73 | yy1 = max(box1[1], box2[1]) 74 | xx2 = min(box1[2], box2[2]) 75 | yy2 = min(box1[3], box2[3]) 76 | 77 | # compute intersect area 78 | w, h = max(xx2 - xx1 + 1, 0.0), max(yy2 - yy1 + 1, 0.0) 79 | inter_area = w * h 80 | 81 | area1 = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1) 82 | area2 = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1) 83 | 84 | return inter_area / float(area1 + area2 - inter_area) 85 | 86 | 87 | def test_iou_rate(): 88 | """Test iou compute. 89 | """ 90 | # box group1 91 | box1 = [146, 173, 240, 263] 92 | box2 = [160, 152, 251, 245] 93 | box2 = [174, 196, 266, 282] 94 | print compute_iou(box1, box2) # 0.37405 95 | 96 | # box group2 97 | box1 = [556, 102, 648, 198] 98 | box2 = [570, 133, 670, 228] 99 | print compute_iou(box1, box2) # 0.38613 100 | 101 | 102 | def test_nms(): 103 | img_path = 'E:/bolg_img/deeplearn/nms/nms_4.jpg' 104 | lst_box = [[146, 173, 240, 263, 0.98], [160, 152, 251, 245, 0.83], 105 | [174, 196, 266, 282, 0.75], 106 | [556, 102, 648, 198, 0.81], 107 | [570, 133, 670, 228, 0.67] 108 | ] 109 | lst_box = np.array(lst_box) 110 | 111 | img = cv2.imread(img_path) 112 | img_copy = img.copy() 113 | draw_rect_score(img, lst_box) 114 | 115 | lst_box, _ = NMS(lst_box, 0.2) 116 | draw_rect_score(img_copy, lst_box) 117 | 118 | cv2.imshow("img_with_rect", img) 119 | cv2.imshow("nms_img_with_rect", img_copy) 120 | 121 | cv2.waitKey(0) 122 | 123 | 124 | if __name__ == '__main__': 125 | test_iou_rate() 126 | # test_nms() 127 | 128 | arr = np.array([4, 1, 3, 5]) 129 | print arr.size 130 | 131 | pass 132 | -------------------------------------------------------------------------------- /python_utils/matplot/plot_many.py: -------------------------------------------------------------------------------- 1 | # _*_ coding:utf-8 _*_ 2 | """ 3 | This file is about subplot 4 | mainly cited from `http://matplotlib.org/examples/pylab_examples/subplots_demo.html` 5 | """ 6 | 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | 10 | 11 | def plot_mul(): 12 | left, width = 0.1, 0.8 13 | ax1 = plt.axes([left, 0.5, width, 0.45]) 14 | ax2 = plt.axes([left, 0.3, width, 0.19]) 15 | ax3 = plt.axes([left, 0.2, width, 0.09], sharex=ax2) 16 | ax4 = plt.axes([left, 0.1, width, 0.09], sharex=ax2) 17 | 18 | # ticks at the top of the top plot 19 | ax1.xaxis.tick_top() 20 | # remove ticks for ax2 and ax3 21 | ax2.xaxis.set_visible(False) 22 | ax3.xaxis.set_visible(False) 23 | 24 | # only put ticks on the bottom of ax4 25 | ax4.xaxis.tick_bottom() 26 | plt.show() 27 | 28 | 29 | def subplot_demo1(): 30 | # Simple data to display in various forms 31 | x = np.linspace(0, 2 * np.pi, 400) 32 | y = np.sin(x ** 2) 33 | 34 | # Just a figure and one subplot 35 | f, ax = plt.subplots() 36 | ax.plot(x, y) 37 | ax.set_title('Simple plot') 38 | plt.show() 39 | 40 | 41 | def subplot_demo2(): 42 | x = np.linspace(0, 2 * np.pi, 400) 43 | y = np.sin(x ** 2) 44 | 45 | f, ax_arr = plt.subplots(2, sharex=True) 46 | ax_arr[0].plot(x, y) 47 | ax_arr[0].set_title('sharing x axis') 48 | ax_arr[1].scatter(x, y) 49 | plt.show() 50 | 51 | 52 | def subplot_demo3(): 53 | # Two subplots, unpack the axes array immediately 54 | x = np.linspace(0, 2 * np.pi, 300) 55 | y = np.sin(x ** 2) 56 | 57 | f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) 58 | ax1.plot(x, y) 59 | ax1.set_title('Sharing Y axis') 60 | ax2.scatter(x, y) 61 | 62 | plt.show() 63 | 64 | 65 | def subplots_demo4(): 66 | x = np.linspace(0, 2 * np.pi, 300) 67 | y = np.sin(x ** 2) 68 | 69 | f, (ax1, ax2, ax3) = plt.subplots(3, sharex=True) 70 | ax1.plot(x, y) 71 | ax1.set_title('sharing x axis') 72 | 73 | ax2.scatter(x, y) 74 | 75 | x1 = [1, 2, 3, 4, 5, 6, 7] 76 | y1 = [2.6, 3.6, 8.3, 56, 12.7, 8.9, 5.3] 77 | ax3.scatter(x1, y1) 78 | plt.show() 79 | 80 | 81 | def subplots_demo5(): 82 | x = np.linspace(0, 2 * np.pi, 300) 83 | y = np.sin(x ** 2) 84 | f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex='col', sharey='row') 85 | ax1.plot(x, y) 86 | ax1.set_title('Sharing x per column, y per row') 87 | ax2.scatter(x, y) 88 | ax2.set_title('ax2') 89 | 90 | ax3.plot(x, 2 * y ** 2 + 1, color='r') 91 | ax3.set_title('ax3') 92 | ax4.plot(x, 2 * y ** 2 + 1, color='b') 93 | ax4.set_title('ax4') 94 | 95 | plt.show() 96 | pass 97 | 98 | 99 | def subplots_demo6(): 100 | x = np.linspace(0, 2 * np.pi, 300) 101 | y = np.sin(x ** 2) 102 | f, ax_arr = plt.subplots(2, 2) 103 | 104 | ax_arr[0, 0].plot(x, y) 105 | ax_arr[0, 0].set_title('axis 0, 0') 106 | 107 | ax_arr[0, 1].scatter(x, y) 108 | ax_arr[0, 1].set_title('axis 0, 1') 109 | 110 | ax_arr[1, 0].plot(x, y ** 2) 111 | ax_arr[1, 0].set_title('axis 1, 0') 112 | 113 | ax_arr[1, 1].scatter(x, y ** 2) 114 | ax_arr[1, 1].set_title('axis 1, 1') 115 | # for row 0, every element x axis hidden 116 | plt.setp([ax.get_xticklabels() for ax in ax_arr[0, :]], visible=False) 117 | # for column 1, every element y axis hidden 118 | plt.setp([ax.get_yticklabels() for ax in ax_arr[:, 1]], visible=False) 119 | plt.show() 120 | 121 | 122 | def plot_fun(): 123 | x = np.linspace(0.0, 1.0, 1000) 124 | plt.plot(x, -1 * np.log2(x) * x) 125 | plt.xlim(0, 1) 126 | plt.ylim(0, 1) 127 | plt.show() 128 | 129 | 130 | def plot_sigmoid(): 131 | def sigmoid(x): 132 | return 1.0 / (1 + np.exp(x)) 133 | 134 | x = np.arange(-10., 10., 0.1) 135 | plt.plot(x, sigmoid(x)) 136 | plt.show() 137 | 138 | 139 | if __name__ == '__main__': 140 | plot_sigmoid() 141 | # plot_fun() 142 | # plot_mul() 143 | # subplot_demo1() 144 | # subplot_demo2() 145 | # subplot_demo3() 146 | # subplots_demo4() 147 | # subplots_demo5() 148 | # subplots_demo6() 149 | pass 150 | -------------------------------------------------------------------------------- /python_utils/sk_sc_pd_operator/pd_miss_data.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import numpy as np 4 | import pandas as pd 5 | 6 | 7 | def miss_series(): 8 | data = pd.Series([1, np.nan, 'hello', None]) 9 | 10 | print data.isnull() 11 | # 0 False 12 | # 1 True 13 | # 2 False 14 | # 3 True 15 | # dtype: bool 16 | print data[data.notnull()] 17 | # 0 1 18 | # 2 hello 19 | # dtype: object 20 | print data.dropna() 21 | # 0 1 22 | # 2 hello 23 | # dtype: object 24 | 25 | 26 | def miss_df(): 27 | df = pd.DataFrame([[1, np.nan, 2], 28 | [2, 3, 5], 29 | [np.nan, 4, 6]]) 30 | 31 | print df 32 | # 0 1 2 33 | # 0 1.0 NaN 2 34 | # 1 2.0 3.0 5 35 | # 2 NaN 4.0 6 36 | print df.dropna() 37 | # 0 1 2 38 | # 1 2.0 3.0 5 39 | print df.dropna(axis=1) 40 | # 2 41 | # 0 2 42 | # 1 5 43 | # 2 6 44 | 45 | df[3] = np.nan 46 | print df 47 | # 0 1 2 3 48 | # 0 1.0 NaN 2 NaN 49 | # 1 2.0 3.0 5 NaN 50 | # 2 NaN 4.0 6 NaN 51 | print df.dropna(axis='columns', how='all') 52 | # 0 1 2 53 | # 0 1.0 NaN 2 54 | # 1 2.0 3.0 5 55 | # 2 NaN 4.0 6 56 | print df.dropna(axis='rows', thresh=3) 57 | # 0 1 2 3 58 | # 1 2.0 3.0 5 NaN 59 | 60 | 61 | def fill_series(): 62 | data = pd.Series([1, np.nan, 2, None, 3], index=list('abcde')) 63 | print data 64 | # a 1.0 65 | # b NaN 66 | # c 2.0 67 | # d NaN 68 | # e 3.0 69 | # dtype: float64 70 | data.fillna(0) 71 | print data 72 | # a 1.0 73 | # b NaN 74 | # c 2.0 75 | # d NaN 76 | # e 3.0 77 | # dtype: float64 78 | 79 | # We can specify a forward-fill to propagate the previous value forward: 80 | print data.fillna(method='ffill') 81 | # a 1.0 82 | # b 1.0 83 | # c 2.0 84 | # d 2.0 85 | # e 3.0 86 | # dtype: float64 87 | 88 | # Or we can specify a back-fill to propagate the next values backward: 89 | print data.fillna(method='bfill') 90 | # a 1.0 91 | # b 2.0 92 | # c 2.0 93 | # d 3.0 94 | # e 3.0 95 | # dtype: float64 96 | 97 | 98 | def fill_df(): 99 | df = pd.DataFrame([[1, np.nan, 2], 100 | [2, 3, 5], 101 | [np.nan, 4, 6]]) 102 | 103 | df[3] = np.nan 104 | print df 105 | # 0 1 2 3 106 | # 0 1.0 NaN 2 NaN 107 | # 1 2.0 3.0 5 NaN 108 | # 2 NaN 4.0 6 NaN 109 | 110 | print df.fillna(method='ffill', axis=1) 111 | # 0 1 2 3 112 | # 0 1.0 1.0 2.0 2.0 113 | # 1 2.0 3.0 5.0 5.0 114 | # 2 NaN 4.0 6.0 6.0 115 | 116 | 117 | def drop_specify(): 118 | data = {'name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], 119 | 'year': [2012, 2012, 2013, 2014, 2014], 120 | 'reports': [4, 24, 31, 2, 3]} 121 | 122 | df = pd.DataFrame(data, 123 | index=['Cochice', 'Pima', 'Santa Cruz', 'Maricopa', 'Yuma']) 124 | 125 | print df 126 | # name reports year 127 | # Cochice Jason 4 2012 128 | # Pima Molly 24 2012 129 | # Santa Cruz Tina 31 2013 130 | # Maricopa Jake 2 2014 131 | # Yuma Amy 3 2014 132 | 133 | print df.drop(['Cochice', 'Pima']) 134 | # name reports year 135 | # Santa Cruz Tina 31 2013 136 | # Maricopa Jake 2 2014 137 | # Yuma Amy 3 2014 138 | 139 | print df.drop('reports', axis=1) 140 | # name year 141 | # Cochice Jason 2012 142 | # Pima Molly 2012 143 | # Santa Cruz Tina 2013 144 | # Maricopa Jake 2014 145 | # Yuma Amy 2014 146 | 147 | print df[df.name != 'Tina'] 148 | 149 | print df.drop(df.index[2]) 150 | 151 | print df.drop(df.index[[2, 3]]) 152 | 153 | if __name__ == '__main__': 154 | drop_specify() 155 | # fill_df() 156 | # fill_series() 157 | # miss_df() 158 | # miss_series() 159 | pass 160 | -------------------------------------------------------------------------------- /python_utils/sk_sc_pd_operator/sk_feature_process.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | from sklearn import datasets 4 | from sklearn.feature_selection import RFE 5 | from sklearn.linear_model import LogisticRegression 6 | import numpy as np 7 | 8 | 9 | def get_dummy_data(): 10 | x = np.array([[-1, 1, -1, 1, -1, 1], 11 | [0.1, 0.2, 0.3, 0.4, 0.5, 0.6], 12 | [0.6, 0.5, 0.4, 0.3, 0.2, 0.1], 13 | ]) 14 | 15 | x = x.T 16 | y = np.array([0, 0, 1, 1, 0, 0]) 17 | 18 | return x, y 19 | 20 | 21 | def sk_feature_ref(): 22 | # load the iris datasets 23 | dataset = datasets.load_iris() 24 | # create a base classifier used to evaluate a subset of attributes 25 | model_lr = LogisticRegression() 26 | # create the RFE model and select 3 attributes 27 | rfe = RFE(model_lr, 3) 28 | rfe = rfe.fit(dataset.data, dataset.target) 29 | # summarize the selection of the attributes 30 | print rfe.support_ 31 | # [False True True True] 32 | print rfe.ranking_ 33 | # [2 1 1 1] 34 | print sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), dataset.feature_names)) 35 | # [(1.0, 'petal length (cm)'), (1.0, 'petal width (cm)'), (1.0, 'sepal width (cm)'), (2.0, 'sepal length (cm)')] 36 | 37 | 38 | def feature_importance(): 39 | from sklearn.ensemble import ExtraTreesClassifier 40 | 41 | dataset = datasets.load_iris() 42 | model = ExtraTreesClassifier() 43 | model.fit(dataset.data, dataset.target) 44 | print zip(dataset.feature_names, map(lambda x: round(x, 2), model.feature_importances_)) 45 | # [('sepal length (cm)', 0.13), ('sepal width (cm)', 0.07), ('petal length (cm)', 0.35), ('petal width (cm)', 0.45)] 46 | 47 | 48 | def sk_feature_ref_v2(): 49 | X, Y = get_dummy_data() 50 | names = ['f1', 'f2', 'f3'] 51 | 52 | model_lr = LogisticRegression() 53 | 54 | rfe = RFE(model_lr, 2) 55 | rfe = rfe.fit(X, Y) 56 | 57 | print rfe.support_ 58 | print rfe.ranking_ 59 | print sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), names)) 60 | 61 | 62 | def test_standard_scaler(): 63 | from sklearn.preprocessing import StandardScaler 64 | arr = [-2, -1, 0, 1, 2] 65 | print StandardScaler().fit_transform(arr) 66 | # [-1.414-0.707 0 0.707 1.414] 67 | 68 | 69 | def test_min_max_scaler(): 70 | from sklearn.preprocessing import MinMaxScaler 71 | arr = np.array([0, 1, 2, 3, 4]) 72 | print MinMaxScaler().fit_transform(arr) 73 | # [ 0. 0.25 0.5 0.75 1. ] 74 | 75 | 76 | def test_normalizer(): 77 | from sklearn.preprocessing import Normalizer 78 | arr = np.array([[3, -1], 79 | [-4, 2]]) 80 | 81 | print Normalizer().fit_transform(arr) 82 | # [[ 0.9486833 -0.31622777] 83 | # [-0.89442719 0.4472136 ]] 84 | 85 | 86 | def test_binarizer(): 87 | from sklearn.preprocessing import Binarizer 88 | arr = np.array([0, 1, 2, 3, 4]) 89 | print Binarizer(threshold=2).fit_transform(arr) 90 | # [[0 0 0 1 1]] 91 | 92 | 93 | def test_pearsonr(): 94 | from scipy.stats import pearsonr 95 | arr1 = np.arange(0, 12) 96 | arr2 = np.arange(5, 17) 97 | print pearsonr(arr1, arr2) 98 | 99 | x = np.arange(-1, 1, 30) 100 | y = x 101 | print pearsonr(x, y) 102 | 103 | 104 | def rfr_feature_select(): 105 | from sklearn.datasets import load_boston 106 | from sklearn.ensemble import RandomForestRegressor 107 | from sklearn.cross_validation import cross_val_score, ShuffleSplit 108 | 109 | boston = load_boston() 110 | X = boston["data"] 111 | Y = boston["target"] 112 | names = boston["feature_names"] 113 | 114 | rf = RandomForestRegressor(n_estimators=20, max_depth=4) 115 | scores = [] 116 | for i in range(X.shape[1]): 117 | score = cross_val_score(rf, X[:, i:i + 1], 118 | Y, scoring="r2", cv=ShuffleSplit(len(X), 3, .3)) 119 | scores.append((round(np.mean(score), 3), names[i])) 120 | 121 | print sorted(scores, reverse=True) 122 | 123 | 124 | if __name__ == '__main__': 125 | feature_importance() 126 | # rfr_feature_select() 127 | # test_pearsonr() 128 | # test_binarizer() 129 | # test_normalizer() 130 | # test_min_max_scaler() 131 | # test_standard_scaler() 132 | # sk_feature_ref_v2() 133 | # sk_feature_ref() 134 | pass 135 | --------------------------------------------------------------------------------