├── python_utils
├── matplot
│ ├── __init__.py
│ ├── plot_accuracy_loss.py
│ ├── basic.py
│ └── plot_many.py
├── utils
│ ├── __init__.py
│ └── JsonUtil.py
├── DbService
│ ├── __init__.py
│ ├── mysql_db
│ │ ├── __init__.py
│ │ ├── DbBase.py
│ │ └── DbSubService.py
│ ├── redis_db
│ │ └── __init__.py
│ ├── sqlalchemy
│ │ ├── __init__.py
│ │ ├── orm.py
│ │ └── basic.py
│ └── config
│ │ └── mysql_config.json
├── al_lt_common
│ ├── __init__.py
│ ├── al_str.py
│ └── al_cv.py
├── distributed
│ ├── __init__.py
│ └── zookeeper_demo
│ │ ├── __init__.py
│ │ ├── zk_failover_monitor.py
│ │ ├── zk_watch.py
│ │ ├── zk_failover_worker.py
│ │ ├── zk_lock_demo.py
│ │ ├── zk_master_select.py
│ │ └── zk_node_ope.py
├── http_basic
│ ├── __init__.py
│ ├── flask_web
│ │ ├── __init__.py
│ │ ├── flask_restful.py
│ │ ├── flask_resp.py
│ │ ├── flask_file_svr.py
│ │ ├── flask_auth.py
│ │ ├── flask_error_handler.py
│ │ ├── flask_basic_web.py
│ │ └── flask_content_type.py
│ ├── http_realize
│ │ ├── __init__.py
│ │ ├── http_realize_1
│ │ │ ├── __init__.py
│ │ │ └── http_server_1.py
│ │ ├── static_server
│ │ │ ├── __init__.py
│ │ │ ├── plain.html
│ │ │ └── static_server.py
│ │ ├── test_SimpleHTTPServer.py
│ │ ├── http_svr_simple.py
│ │ └── http_svr_basic_1.py
│ ├── simple_rpc
│ │ ├── __init__.py
│ │ ├── rpc_client_1.py
│ │ └── rpc_server_1.py
│ ├── socket_basic
│ │ ├── __init__.py
│ │ ├── udp_sock
│ │ │ ├── __init__.py
│ │ │ ├── udp_client.py
│ │ │ └── udp_server.py
│ │ ├── SocketServer_basic.py
│ │ └── basic_client.py
│ ├── wsgi_demo
│ │ ├── __init__.py
│ │ └── wsgi_demo.py
│ ├── http_client_get.py
│ └── url_ope.py
├── machine_learn
│ ├── __init__.py
│ ├── PCA
│ │ ├── __init__.py
│ │ └── pca_basic.py
│ ├── Bayes
│ │ ├── __init__.py
│ │ └── bayes_sklearn.py
│ ├── cluster
│ │ ├── __init__.py
│ │ └── sk_cluster.py
│ ├── knearest
│ │ ├── __init__.py
│ │ ├── knn_scratch.py
│ │ └── knn_classify_sklearn.py
│ ├── decision_tree
│ │ ├── __init__.py
│ │ ├── dt.png
│ │ ├── tree.dot
│ │ ├── create_data.py
│ │ ├── dtree_scratch.py
│ │ └── dtree_sklearn.py
│ ├── perception
│ │ ├── __init__.py
│ │ └── perception.py
│ ├── linear_regression
│ │ ├── __init__.py
│ │ └── sk_example.py
│ ├── logistic_regression
│ │ ├── __init__.py
│ │ ├── lr_sklearn_v1.py
│ │ └── lr_scratch.py
│ ├── neural_network_keras
│ │ ├── __init__.py
│ │ ├── lstm_nlp.py
│ │ ├── cnn_keras_digits.py
│ │ └── nn_keras_digits.py
│ └── dataset
│ │ ├── decision_tree
│ │ └── data_banknote_authentication.txt
│ │ ├── cluster
│ │ └── cluster_txt
│ │ ├── perception
│ │ └── dataset.txt
│ │ └── logistic_regression
│ │ └── lr_ml_action.txt
├── netsocket
│ ├── __init__.py
│ ├── basic_socket.py
│ └── ip_int.py
├── numpy_operate
│ ├── __init__.py
│ ├── structured_arr.py
│ ├── arr_vectorize.py
│ ├── flip_arr.py
│ ├── zero_one_empty.py
│ ├── broadcast_demo.py
│ ├── array_multiply.py
│ ├── np_distance.py
│ ├── arr_equal_close.py
│ ├── arr_sort.py
│ ├── algebra_op.py
│ ├── log2_op.py
│ ├── idx_arrays.py
│ ├── random_arr.py
│ └── array_create.py
├── opencv_basic
│ ├── __init__.py
│ ├── path_var.py
│ ├── cv_basic_op.py
│ └── url_img_cv.py
├── thread_process
│ ├── __init__.py
│ ├── basic_process.py
│ ├── sema_thread.py
│ ├── basic_thread.py
│ ├── thread_timer.py
│ ├── multitread_profile.py
│ ├── thread_condition.py
│ ├── pool_dummy.py
│ ├── pool_queue.py
│ ├── thread_lock.py
│ └── thread_queue.py
├── document
│ ├── machine_learn
│ │ ├── knearest
│ │ │ └── README.md
│ │ └── percepton
│ │ │ ├── perception_plot.jpg
│ │ │ ├── perception_ret.jpg
│ │ │ └── README.md
│ └── numpy_operate
│ │ └── README.md
├── py_basic
│ ├── __init__.py
│ ├── arg_parse.py
│ ├── MD5_sha.py
│ ├── log_config.py
│ ├── global_val.py
│ ├── profile_ope.py
│ ├── with_usage.py
│ ├── collection_ope.py
│ ├── except_ope.py
│ ├── argparse_ope.py
│ ├── tuple_operate.py
│ ├── num_ope.py
│ ├── argv_basic.py
│ ├── decorator_basic.py
│ ├── yield_ope.py
│ ├── random_operator.py
│ ├── decorator_set.py
│ ├── dw_img_from_google.py
│ ├── calendar_ope.py
│ ├── operator_ope.py
│ ├── obj_is.py
│ ├── base64_test.py
│ ├── functional_program.py
│ ├── kwargs_xargs.py
│ ├── str_basic.py
│ ├── set_ope.py
│ └── time_ope.py
└── sk_sc_pd_operator
│ ├── __init__.py
│ ├── sc_distance_ope.py
│ ├── pd_str.py
│ ├── sk_KFlod.py
│ ├── pd_plot.py
│ ├── sk_metric_accuracy.py
│ ├── pd_feature2value.py
│ ├── split_train_test_data.py
│ ├── pd_index.py
│ ├── pd_visualize_diamond.py
│ ├── pd_concat_join.py
│ ├── pd_GridSearchCV.py
│ ├── pd_ope.py
│ ├── pd_dummy_val.py
│ ├── pd_date_time.py
│ ├── pd_pivot.py
│ ├── pd_miss_data.py
│ └── sk_feature_process.py
└── README.md
/python_utils/matplot/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/DbService/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/al_lt_common/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/distributed/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/http_basic/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/netsocket/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/numpy_operate/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/opencv_basic/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/DbService/mysql_db/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/DbService/redis_db/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/PCA/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/opencv_basic/path_var.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/python_utils/thread_process/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/DbService/sqlalchemy/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/http_basic/flask_web/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/http_basic/http_realize/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/http_basic/simple_rpc/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/http_basic/socket_basic/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/http_basic/wsgi_demo/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/Bayes/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/cluster/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/knearest/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/distributed/zookeeper_demo/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/decision_tree/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/perception/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/document/machine_learn/knearest/README.md:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/http_basic/socket_basic/udp_sock/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/linear_regression/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/logistic_regression/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/http_basic/http_realize/http_realize_1/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/http_basic/http_realize/static_server/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/neural_network_keras/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/dataset/decision_tree/data_banknote_authentication.txt:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python_utils/py_basic/__init__.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | if __name__ == '__main__':
4 |
5 | pass
6 |
--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | This package is common usage about sklearn, scipy, pandas library
3 | """
--------------------------------------------------------------------------------
/python_utils/DbService/config/mysql_config.json:
--------------------------------------------------------------------------------
1 | {
2 | "host":"localhost","user":"root","pwd":"123456",
3 | "db":"springdemo","port":3306
4 | }
--------------------------------------------------------------------------------
/python_utils/opencv_basic/cv_basic_op.py:
--------------------------------------------------------------------------------
1 | # _*_coding:utf-8 _*_
2 |
3 | """
4 | This file is basic operator about cv2
5 | """
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/decision_tree/dt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jayhello/python_utils/HEAD/python_utils/machine_learn/decision_tree/dt.png
--------------------------------------------------------------------------------
/python_utils/document/machine_learn/percepton/perception_plot.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jayhello/python_utils/HEAD/python_utils/document/machine_learn/percepton/perception_plot.jpg
--------------------------------------------------------------------------------
/python_utils/document/machine_learn/percepton/perception_ret.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jayhello/python_utils/HEAD/python_utils/document/machine_learn/percepton/perception_ret.jpg
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # python_utils
2 | 1.This project is my common use/record of python. For some utils if we don't use for a long time,we may forget it.
3 | so I upload it to github.When I need to use some utils,I can get it directily from this project avoiding research form
4 | google.
5 |
--------------------------------------------------------------------------------
/python_utils/http_basic/http_realize/static_server/plain.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Plain Page
6 |
7 |
8 | Plain Page
9 | Nothin' but HTML.
10 |
11 |
--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/sc_distance_ope.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | from scipy.spatial import distance
4 | import numpy as np
5 |
6 |
7 | def eu_distance():
8 | a1 = np.array([1, 2, 3])
9 | a2 = np.array([3, 4, 5])
10 | print distance.euclidean(a1, a2)
11 | # 3.46410161514
12 |
13 | if __name__ == '__main__':
14 | eu_distance()
15 | pass
16 |
--------------------------------------------------------------------------------
/python_utils/document/numpy_operate/README.md:
--------------------------------------------------------------------------------
1 | sometimes you want numbered lists
2 |
3 | 1. one
4 | 2. two
5 |
6 | sometimes you want bullet points
7 |
8 | * start a line with a start
9 | * profit!
10 |
11 | Alternatively,
12 |
13 | - Dashes work just as well
14 | - And if you have sub points, put two spaces before the dash or star:
15 | - Like this
16 | - And this
17 | * hello world!
18 |
--------------------------------------------------------------------------------
/python_utils/document/machine_learn/percepton/README.md:
--------------------------------------------------------------------------------
1 | ## The running result images
2 | 1. 
3 | 2. 
4 |
5 | ## Example of realize perception
6 |
--------------------------------------------------------------------------------
/python_utils/thread_process/basic_process.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | import os
4 | import multiprocessing
5 |
6 |
7 | def get_process_id():
8 | print os.getpid()
9 | # 8844
10 | print multiprocessing.current_process().pid
11 | # 8844
12 | print multiprocessing.current_process().name
13 | # MainProcess
14 |
15 |
16 | if __name__ == '__main__':
17 | get_process_id()
18 |
--------------------------------------------------------------------------------
/python_utils/py_basic/arg_parse.py:
--------------------------------------------------------------------------------
1 | """
2 | This file shows basic usage of argparse.
3 | """
4 |
5 | import argparse
6 |
7 | ap = argparse.ArgumentParser()
8 | ap.add_argument('-i', '--image', required=True, help='path to image file')
9 | ap.add_argument('-w', '--weights', default='./cnn_weights.dat',
10 | help='path to weights file')
11 |
12 | args = ap.parse_args()
13 | print args.image, args.weights
14 |
--------------------------------------------------------------------------------
/python_utils/py_basic/MD5_sha.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | import md5
3 | import sha
4 | import hashlib
5 |
6 |
7 | def test_md5():
8 | content = 'hello xy, are you ok?'
9 | print hashlib.md5(content).hexdigest()
10 | # 180d5f07d511b660f320cf2a645f1f3b
11 | print hashlib.sha1(content).hexdigest()
12 | # c25884a4688c8b1a25a619f198f91f8661b2623b
13 |
14 |
15 | if __name__ == '__main__':
16 | test_md5()
17 | pass
18 |
--------------------------------------------------------------------------------
/python_utils/http_basic/flask_web/flask_restful.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | from flask import Flask
4 | from flask import jsonify
5 |
6 |
7 | app = Flask(__name__)
8 |
9 |
10 | @app.route('/')
11 | def index():
12 | return "hello world"
13 |
14 |
15 | @app.route('/idx')
16 | def index_js():
17 | d = {"k": "hello world"}
18 | return jsonify(d)
19 |
20 |
21 | if __name__ == '__main__':
22 | app.run(host='0.0.0.0')
23 | pass
24 |
--------------------------------------------------------------------------------
/python_utils/py_basic/log_config.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | import logging
3 |
4 | # log config, module name, line num, function name
5 | logging.basicConfig(
6 | format="%(asctime)s %(levelname)s %(module)s:%(lineno)s %(funcName)s %(threadName)s %(message)s",
7 | level=logging.DEBUG,
8 | datefmt='%Y-%m-%d %I:%M:%S'
9 | )
10 |
11 |
12 | def test_log():
13 | logging.info('hello world')
14 |
15 | if __name__ == '__main__':
16 | test_log()
17 | pass
18 |
--------------------------------------------------------------------------------
/python_utils/py_basic/global_val.py:
--------------------------------------------------------------------------------
1 | # _*_coding: utf-8 _*_
2 | """
3 | test for global variable
4 | note that in multiprocess, every process has it's own
5 | global variable, so if the function fun2 is in subprocess
6 | it will still be 0
7 | """
8 | g_dst_dir = ''
9 |
10 | g_val = 0
11 |
12 |
13 | def fun2():
14 | print g_val
15 |
16 |
17 | def fun1():
18 | global g_val
19 | g_val = 3
20 | fun2()
21 |
22 |
23 | if __name__ == '__main__':
24 | fun1()
25 | pass
26 |
--------------------------------------------------------------------------------
/python_utils/http_basic/http_realize/test_SimpleHTTPServer.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import SimpleHTTPServer
4 |
5 |
6 | def test_translate_path():
7 | url = "http://yy.com/ai/xy/"
8 | handler = SimpleHTTPServer.SimpleHTTPRequestHandler(None, None, None)
9 |
10 | print handler.translate_path(url)
11 |
12 |
13 | if __name__ == '__main__':
14 |
15 | # ----- test translate path -----
16 | if 1:
17 | test_translate_path()
18 | # ----- end -----
19 |
20 | pass
21 |
--------------------------------------------------------------------------------
/python_utils/numpy_operate/structured_arr.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | import numpy as np
3 |
4 |
5 | def create_structured_arr():
6 | dtype = [('name', 'S10'), ('height', float), ('age', int)]
7 | arr_val = [('Arthur', 1.8, 41), ('Lancelot', 1.9, 38),
8 | ('Galahad', 1.7, 38)]
9 |
10 | arr = np.array(arr_val, dtype=dtype)
11 | print np.sort(arr, order='height')
12 | pass
13 |
14 |
15 | if __name__ == '__main__':
16 | create_structured_arr()
17 | pass
18 |
--------------------------------------------------------------------------------
/python_utils/distributed/zookeeper_demo/zk_failover_monitor.py:
--------------------------------------------------------------------------------
1 | from kazoo.client import KazooClient
2 |
3 | import time
4 |
5 | import logging
6 | logging.basicConfig()
7 |
8 | zk = KazooClient(hosts='127.0.0.1:2181')
9 | zk.start()
10 |
11 | # Determine if a node exists
12 | while True:
13 | if zk.exists("/test/failure_detection/worker"):
14 | print "the worker is alive!"
15 | else:
16 | print "the worker is dead!"
17 | break
18 | time.sleep(3)
19 |
20 | zk.stop()
21 |
--------------------------------------------------------------------------------
/python_utils/distributed/zookeeper_demo/zk_watch.py:
--------------------------------------------------------------------------------
1 | from kazoo.client import KazooClient
2 | import time
3 |
4 | import logging
5 | logging.basicConfig()
6 |
7 | zk = KazooClient(hosts='127.0.0.1:2181')
8 | zk.start()
9 |
10 |
11 | @zk.DataWatch('/test/zk1/node')
12 | def my_func(data, stat):
13 | if data:
14 | print "Data is %s" % data
15 | print "Version is %s" % stat.version
16 | else:
17 | print "data is not available"
18 |
19 | while True:
20 | time.sleep(10)
21 |
22 | zk.stop()
23 |
--------------------------------------------------------------------------------
/python_utils/thread_process/sema_thread.py:
--------------------------------------------------------------------------------
1 | import time
2 | from random import random
3 | from threading import Thread, Semaphore
4 |
5 | sema = Semaphore(3)
6 |
7 |
8 | def foo(tid):
9 | with sema:
10 | print '{} acquire sema'.format(tid)
11 | wt = random() * 2
12 | time.sleep(wt)
13 | print '{} release sema'.format(tid)
14 |
15 |
16 | threads = []
17 | for i in range(5):
18 | t = Thread(target=foo, args=(i,))
19 | threads.append(t)
20 | t.start()
21 | for t in threads:
22 | t.join()
23 |
--------------------------------------------------------------------------------
/python_utils/http_basic/simple_rpc/rpc_client_1.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import xmlrpclib
4 |
5 |
6 | def test_client_1():
7 | host = "http://localhost:8888/"
8 | proxy = xmlrpclib.ServerProxy(host)
9 | print "using proxy %s" % proxy
10 |
11 | print "3 is even %s" % str(proxy.is_even(3))
12 | print "100 is even %s" % str(proxy.is_even(100))
13 |
14 |
15 | if __name__ == '__main__':
16 |
17 | # ----- test simple rpv client -----
18 | if 1:
19 | test_client_1()
20 | # ----- end -----
21 |
22 | pass
23 |
--------------------------------------------------------------------------------
/python_utils/distributed/zookeeper_demo/zk_failover_worker.py:
--------------------------------------------------------------------------------
1 | from kazoo.client import KazooClient
2 | import time
3 |
4 | import logging
5 | logging.basicConfig()
6 |
7 | zk = KazooClient(hosts='127.0.0.1:2181')
8 | zk.start()
9 |
10 | # Ensure a path, create if necessary
11 | zk.ensure_path("/test/failure_detection")
12 |
13 | # Create a node with data
14 | zk.create("/test/failure_detection/worker",
15 | value=b"a test value", ephemeral=True)
16 |
17 | while True:
18 | print "I am alive!"
19 | time.sleep(3)
20 |
21 | zk.stop()
22 |
--------------------------------------------------------------------------------
/python_utils/py_basic/profile_ope.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import cProfile
4 |
5 |
6 | def func1():
7 | sum = 0
8 | for i in range(1000000):
9 | sum += i
10 |
11 | # 1 0.167 0.167 0.167 0.167 {range}
12 | # 4 function calls in 0.674 seconds
13 |
14 |
15 | def func2():
16 | sum = 0
17 | for i in xrange(1000000):
18 | sum += i
19 |
20 | # 3 function calls in 0.350 seconds
21 |
22 | if __name__ == '__main__':
23 | # cProfile.run("func1()")
24 | cProfile.run("func2()")
25 | pass
26 |
--------------------------------------------------------------------------------
/python_utils/http_basic/flask_web/flask_resp.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | from flask import Flask
4 | from flask import Response
5 | import json
6 |
7 |
8 | app = Flask(__name__)
9 |
10 |
11 | @app.route('/hello', methods=['GET'])
12 | def api_hello():
13 | data = {'name': 'xy', 'greet': "hello"}
14 | js_str = json.dumps(data)
15 |
16 | resp = Response(js_str, status=200, mimetype='application/json')
17 | resp.headers['Link'] = 'http://xy.com'
18 |
19 | return resp
20 |
21 |
22 | if __name__ == '__main__':
23 | app.run(host='0.0.0.0')
24 | pass
25 |
--------------------------------------------------------------------------------
/python_utils/py_basic/with_usage.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 |
4 | class Sample(object):
5 | def __enter__(self):
6 | print 'in __enter__'
7 | return "Foo"
8 |
9 | def __exit__(self, exc_type, exc_val, exc_tb):
10 | print 'in __exit__'
11 |
12 |
13 | def get_sample():
14 | return Sample()
15 |
16 |
17 | def test_with():
18 | with get_sample() as sp:
19 | print 'Sample: ', sp
20 |
21 | # in __enter__
22 | # Sample: Foo
23 | # in __exit__
24 |
25 | if __name__ == '__main__':
26 | test_with()
27 | pass
28 |
--------------------------------------------------------------------------------
/python_utils/netsocket/basic_socket.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | """
3 | This file is about some basic operator of socket
4 | """
5 |
6 |
7 | import socket
8 |
9 |
10 | def get_ip():
11 | """
12 | local host ip not 127.0.0.1
13 | socket.gethostbyname(socket.gethostname()) will return 127.0.0.1
14 | :return:
15 | """
16 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
17 | s.connect(("gmail.com", 80))
18 | # s.getsockname() -> ip:port
19 | print s.getsockname()[0]
20 | s.close()
21 |
22 |
23 | if __name__ == '__main__':
24 | get_ip()
25 |
--------------------------------------------------------------------------------
/python_utils/py_basic/collection_ope.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | """
4 | operator about collection
5 | """
6 |
7 | from collections import Counter
8 |
9 |
10 | def counter_usage():
11 | lst = ['class_1', 'class_2', 'class_1', 'class_1', 'class_1', 'class_2']
12 |
13 | print Counter(lst).most_common()
14 | # [('class_1', 4), ('class_2', 2)]
15 |
16 | print Counter(lst).most_common(1)
17 | # [('class_1', 4)]
18 |
19 | print Counter(lst).most_common(1)[0][0]
20 | # class_1
21 |
22 |
23 | if __name__ == '__main__':
24 | counter_usage()
25 | pass
26 |
--------------------------------------------------------------------------------
/python_utils/py_basic/except_ope.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 |
4 | def test_except_scope():
5 | try:
6 | v = 'test scope'
7 | raise Exception
8 | except Exception as e:
9 | # if v is locals():
10 | # print v
11 | print v
12 |
13 |
14 | def test_except():
15 | try:
16 | raise 7
17 | except Exception as e:
18 | print e
19 | # exceptions must be old-style classes or derived from BaseException, not int
20 |
21 |
22 | if __name__ == '__main__':
23 | # test_except_scope()
24 | test_except()
25 | pass
26 |
--------------------------------------------------------------------------------
/python_utils/http_basic/simple_rpc/rpc_server_1.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | from SimpleXMLRPCServer import SimpleXMLRPCServer
4 |
5 |
6 | def is_even(n):
7 | return n % 2
8 |
9 |
10 | def test_server_1():
11 | port = 8888
12 | rpc_server = SimpleXMLRPCServer(("localhost", port))
13 | print 'now listening in %s' % port
14 |
15 | rpc_server.register_function(is_even, "is_even")
16 | rpc_server.serve_forever()
17 |
18 |
19 | if __name__ == '__main__':
20 |
21 | # -----test rpc server 1-----
22 | if 1:
23 | test_server_1()
24 | # ----- end -----
25 |
26 | pass
27 |
--------------------------------------------------------------------------------
/python_utils/distributed/zookeeper_demo/zk_lock_demo.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | from kazoo.client import KazooClient
4 | import time
5 | import uuid
6 | import logging
7 | logging.basicConfig()
8 |
9 | my_id = uuid.uuid4()
10 |
11 |
12 | def work():
13 | print "{} is working! ".format(str(my_id))
14 |
15 |
16 | zk = KazooClient(hosts='127.0.0.1:2181')
17 | zk.start()
18 |
19 | lock = zk.Lock("/lockpath", str(my_id))
20 |
21 | print "I am {}".format(str(my_id))
22 |
23 | while True:
24 | with lock:
25 | work()
26 | time.sleep(3)
27 |
28 | zk.stop()
29 |
30 | if __name__ == '__main__':
31 |
32 | pass
33 |
--------------------------------------------------------------------------------
/python_utils/distributed/zookeeper_demo/zk_master_select.py:
--------------------------------------------------------------------------------
1 | from kazoo.client import KazooClient
2 | import time
3 | import uuid
4 |
5 | import logging
6 | logging.basicConfig()
7 |
8 | my_id = uuid.uuid4()
9 |
10 |
11 | def leader_func():
12 | print "I am the leader {}".format(str(my_id))
13 | while True:
14 | print "{} is working! ".format(str(my_id))
15 | time.sleep(3)
16 |
17 | zk = KazooClient(hosts='127.0.0.1:2181')
18 | zk.start()
19 |
20 | election = zk.Election("/electionpath")
21 |
22 | # blocks until the election is won, then calls
23 | # leader_func()
24 | election.run(leader_func)
25 |
26 | zk.stop()
27 |
--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_str.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import numpy as np
4 | import pandas as pd
5 |
6 |
7 | def series_str():
8 | data = ['peter', 'Paul', None, 'MARY', 'gUIDO']
9 | names = pd.Series(data)
10 | print names.str.capitalize()
11 | # 0 Peter
12 | # 1 Paul
13 | # 2 None
14 | # 3 Mary
15 | # 4 Guido
16 | # dtype: object
17 |
18 | print names.str.startswith('p')
19 | # 0 True
20 | # 1 False
21 | # 2 None
22 | # 3 False
23 | # 4 False
24 | # dtype: object
25 |
26 | if __name__ == '__main__':
27 | series_str()
28 | pass
29 |
--------------------------------------------------------------------------------
/python_utils/http_basic/socket_basic/udp_sock/udp_client.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | import socket
4 |
5 | """
6 | Test for upd max end size.
7 | """
8 |
9 |
10 | def udp_client():
11 | host, port = "localhost", 8888
12 |
13 | sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
14 | msg = 'a' * 1024 * 65
15 | msg = 'a' * 65507
16 |
17 | sent = sock.sendto(msg, (host, port))
18 | # sent = sock.sendto(msg, (host, port))
19 |
20 | data, server_add = sock.recvfrom(1024 * 64)
21 | print 'rcv from %s: %s' % (server_add, len(data))
22 |
23 |
24 | if __name__ == '__main__':
25 | udp_client()
26 | pass
27 |
28 |
29 |
--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/sk_KFlod.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 | """
3 | http://scikit-learn.org/stable/modules/cross_validation.html
4 | """
5 |
6 |
7 | import pandas as pd
8 | import numpy as np
9 | from sklearn.model_selection import KFold
10 |
11 |
12 | def kfold_1():
13 | X = np.array([[3, 4, 0], [3, 2, 1], [5, 6, 0],
14 | [1, 2, 1], [1, 5, 0], [7, 4, 1]])
15 | kf = KFold(n_splits=4)
16 | for train, test in kf.split(X):
17 | print "=======train: %s" % X[train].tolist()
18 | # print X[train]
19 | print "=======test : %s" % X[test].tolist()
20 |
21 |
22 | if __name__ == '__main__':
23 | kfold_1()
24 | pass
25 |
--------------------------------------------------------------------------------
/python_utils/http_basic/flask_web/flask_file_svr.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | from flask import Flask
4 | from flask import Response
5 |
6 |
7 | app = Flask(__name__)
8 |
9 |
10 | @app.route('/audio/pcm_mp3/')
11 | def stream_mp3():
12 | def generate():
13 | path = 'F:/826.mp3'
14 | with open(path, 'rb') as fmp3:
15 | data = fmp3.read(1024)
16 | while data:
17 | yield data
18 | data = fmp3.read(1024)
19 |
20 | return Response(generate(), mimetype="audio/mpeg3")
21 |
22 |
23 | if __name__ == '__main__':
24 | # so the other machine can visit the website by ip
25 | app.run(host='0.0.0.0')
26 | pass
27 |
--------------------------------------------------------------------------------
/python_utils/numpy_operate/arr_vectorize.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import numpy as np
4 |
5 |
6 | def my_fun(a, b):
7 | if a >= b:
8 | return a - b
9 | else:
10 | return a + b
11 |
12 |
13 | def test_vectorize():
14 | v_fun = np.vectorize(my_fun)
15 | arr = np.arange(8).reshape(2, 4)
16 | print arr
17 | # [[0 1 2 3]
18 | # [4 5 6 7]]
19 | print v_fun(arr, 4)
20 | # [[4 5 6 7]
21 | # [0 1 2 3]]
22 |
23 | squarer = lambda t: t ** 2
24 | v_fun = np.vectorize(squarer)
25 | print v_fun(arr)
26 | # [[ 0 1 4 9]
27 | # [16 25 36 49]]
28 |
29 | if __name__ == '__main__':
30 | test_vectorize()
31 | pass
32 |
--------------------------------------------------------------------------------
/python_utils/http_basic/socket_basic/udp_sock/udp_server.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | import socket
4 |
5 |
6 | def udp_server():
7 | host, port = "localhost", 8888
8 |
9 | sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
10 | sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
11 | sock.bind((host, port))
12 |
13 | rcv_size = 1024 * 65
14 | while True:
15 | data, address = sock.recvfrom(rcv_size)
16 | print "recv from %s: %s" % (address, len(data))
17 |
18 | if data:
19 | send_size = sock.sendto(data, address)
20 | print "sendto %s: %s" % (address, send_size)
21 |
22 |
23 | if __name__ == '__main__':
24 |
25 | udp_server()
26 |
27 | pass
--------------------------------------------------------------------------------
/python_utils/numpy_operate/flip_arr.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | import numpy as np
4 |
5 |
6 | def flip_arr():
7 | arr = np.arange(6).reshape(2, 3)
8 | print arr
9 | # [[0 1 2]
10 | # [3 4 5]]
11 | print np.fliplr(arr)
12 | # [[2 1 0]
13 | # [5 4 3]]
14 | print arr[:, ::-1]
15 | # [[2 1 0]
16 | # [5 4 3]]
17 |
18 | print np.flipud(arr)
19 | # [[3 4 5]
20 | # [0 1 2]]
21 | print arr[::-1]
22 | # [[3 4 5]
23 | # [0 1 2]]
24 |
25 | arr2 = np.arange(8).reshape((2, 2, 2))
26 | print arr2
27 | # [[[0 1]
28 | # [2 3]]
29 | #
30 | # [[4 5]
31 | # [6 7]]]
32 |
33 |
34 | if __name__ == '__main__':
35 | flip_arr()
36 | pass
37 |
--------------------------------------------------------------------------------
/python_utils/DbService/mysql_db/DbBase.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | import MySQLdb
4 | from utils.JsonUtil import get_json_from_file
5 |
6 |
7 | class DbBase(object):
8 | def __init__(self, **kwargs):
9 | db_config_file = kwargs['db_config_file']
10 | self.config_db(db_config_file)
11 |
12 | def config_db(self, db_config_file):
13 | data = get_json_from_file(db_config_file)
14 | host = data['host']
15 | user = data['user']
16 | pwd = data['pwd']
17 | db = data['db']
18 | port = data['port']
19 |
20 | self.conn = MySQLdb.connect(host=host, port=port, user=user, passwd=pwd, db=db, charset="utf8", use_unicode=True)
21 | self.cursor = self.conn.cursor()
22 |
--------------------------------------------------------------------------------
/python_utils/py_basic/argparse_ope.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | """
4 | this file is demo about basic args parse usage
5 | """
6 |
7 | import argparse
8 |
9 |
10 | def args_num_sum():
11 | parser = argparse.ArgumentParser(description="process some integers")
12 |
13 | parser.add_argument('integers', metavar='N', type=int, nargs='+',
14 | help='an integer for accumulator')
15 |
16 | parser.add_argument('--sum', dest='accumulate', action='store_const',
17 | const=sum, default=max, help='sum the integers (default: find the max)')
18 |
19 | args = parser.parse_args()
20 | print args.accumulate(args.integers)
21 |
22 |
23 | if __name__ == '__main__':
24 | args_num_sum()
25 | pass
26 |
--------------------------------------------------------------------------------
/python_utils/http_basic/flask_web/flask_auth.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 | """
3 | cited https://github.com/miguelgrinberg/Flask-HTTPAuth
4 | """
5 |
6 | from flask import Flask
7 | from flask import request
8 | from flask_httpauth import HTTPBasicAuth
9 |
10 |
11 | app = Flask(__name__)
12 | auth = HTTPBasicAuth()
13 |
14 | users = {"xy1": "11111", "xy2": "22222"}
15 |
16 |
17 | @auth.get_password
18 | def get_pw(user_name):
19 | if user_name in users:
20 | return users.get(user_name)
21 |
22 | return None
23 |
24 |
25 | @app.route('/')
26 | @auth.login_required
27 | def index():
28 | print request.authorization
29 | return "hello, %s" % auth.username()
30 |
31 |
32 | if __name__ == '__main__':
33 | app.run(host='0.0.0.0')
34 | pass
35 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/PCA/pca_basic.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | import numpy as np
3 |
4 |
5 | def get_eigen_vector():
6 | mt = np.array([[3, -1], [-1, 3]])
7 | eig_val, eig_vec = np.linalg.eig(mt)
8 | print eig_val
9 | print eig_vec
10 |
11 |
12 | def get_mean():
13 | mt = np.array([[3, 1], [-1, 3]])
14 | m_1 = np.mean(mt[0, :])
15 | m_2 = np.mean(mt[1, :])
16 | m = np.mean(mt)
17 | print m_1
18 | print m_2
19 | print m
20 |
21 | c_1 = np.mean(mt[:, 0])
22 | c_2 = np.mean(mt[:, 1])
23 | print c_1
24 | print c_2
25 | m = np.mean(mt, axis=0)
26 | # m = np.mean(mt, axis=(0, 1))
27 | print m
28 |
29 | if __name__ == '__main__':
30 | # get_eigen_vector()
31 | get_mean()
32 | pass
33 |
--------------------------------------------------------------------------------
/python_utils/http_basic/http_realize/http_svr_simple.py:
--------------------------------------------------------------------------------
1 | # _*_coding:utf-8 _*_
2 |
3 | import BaseHTTPServer
4 |
5 |
6 | class RequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
7 |
8 | Page = '''
9 |
10 |
11 | Hello World
12 |
13 |
14 | '''
15 |
16 | def do_GET(self):
17 | self.send_response(200)
18 | self.send_header("Content-type", "text/html")
19 | self.send_header("Content-Length", str(len(self.Page)))
20 | self.end_headers()
21 | self.wfile.write(self.Page)
22 |
23 |
24 | if __name__ == '__main__':
25 | serverAddress = ('', 8888)
26 | server = BaseHTTPServer.HTTPServer(serverAddress, RequestHandler)
27 | server.serve_forever()
28 |
--------------------------------------------------------------------------------
/python_utils/http_basic/socket_basic/SocketServer_basic.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 | from SocketServer import TCPServer, StreamRequestHandler
3 |
4 | RESPONSE = b"""\
5 | HTTP/1.1 200 OK
6 | Content-type: text/html
7 | Content-length: 15
8 |
9 | Hello!
""".replace(b"\n", b"\r\n")
10 |
11 |
12 | class MyHandler1(StreamRequestHandler):
13 | """process tcp server, and send http response back"""
14 | def handle(self):
15 | addr = self.request.getpeername()
16 | print 'get connection from %s, %s ' % addr
17 | self.wfile.write(RESPONSE)
18 |
19 |
20 | def test_handler1():
21 | # http server
22 | server = TCPServer(('', 8888), MyHandler1)
23 | server.serve_forever()
24 |
25 |
26 | if __name__ == '__main__':
27 | test_handler1()
28 | pass
29 |
--------------------------------------------------------------------------------
/python_utils/http_basic/flask_web/flask_error_handler.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 |
4 | from flask import Flask
5 | from flask import request
6 | from flask import jsonify
7 |
8 | app = Flask(__name__)
9 |
10 |
11 | @app.errorhandler(404)
12 | def not_found(error=None):
13 | msg = {'status': 404, 'message': 'Not Found: ' + request.url}
14 |
15 | resp = jsonify(msg)
16 | resp.status_code = 404
17 |
18 | return resp
19 |
20 |
21 | @app.route('/users/', methods=['GET'])
22 | def api_users(userid):
23 | users = {'1': 'xy1', '2': 'xy2', '3': 'xy3'}
24 |
25 | if userid in users:
26 | return jsonify({userid: users[userid]})
27 | else:
28 | return not_found()
29 |
30 | if __name__ == '__main__':
31 | app.run(host='0.0.0.0')
32 |
33 | pass
34 |
--------------------------------------------------------------------------------
/python_utils/distributed/zookeeper_demo/zk_node_ope.py:
--------------------------------------------------------------------------------
1 | from kazoo.client import KazooClient
2 |
3 | import logging
4 | logging.basicConfig()
5 |
6 | zk = KazooClient(hosts='127.0.0.1:2181')
7 | zk.start()
8 |
9 | # Ensure a path, create if necessary
10 | zk.ensure_path("/test/zk1")
11 |
12 | # Create a node with data
13 | zk.create("/test/zk1/node", b"a test value11")
14 |
15 | # Determine if a node exists
16 | if zk.exists("/test/zk1"):
17 | print "the node exist"
18 |
19 | # Print the version of a node and its data
20 | data, stat = zk.get("/test/zk1")
21 | print("Version: %s, data: %s" % (stat.version, data.decode("utf-8")))
22 |
23 | # List the children
24 | children = zk.get_children("/test/zk1")
25 | print("There are %s children with names %s" % (len(children), children))
26 |
27 | zk.stop()
28 |
--------------------------------------------------------------------------------
/python_utils/thread_process/basic_thread.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | """
3 | This file is about basic operator about thread
4 | """
5 |
6 | import threading
7 | import logging
8 | logging.basicConfig(level=logging.DEBUG,
9 | format='%(asctime)s %(threadName)s %(message)s',
10 | datefmt='%Y-%m-%d %I:%M:%S')
11 |
12 |
13 | def join_all_others_thread():
14 | """
15 | block the thread invoking this method
16 | :return:
17 | """
18 | logging.debug('now join all the other threads')
19 | main_thread = threading.currentThread()
20 | for t in threading.enumerate():
21 | if t is not main_thread:
22 | t.join()
23 |
24 | # the following msg will be print after all the other thread done
25 | logging.debug('join all the other threads success')
26 |
--------------------------------------------------------------------------------
/python_utils/py_basic/tuple_operate.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 |
4 | def list_2_tuple():
5 | lst = ['python', 12, True, [1, 2]]
6 | tp = tuple(lst)
7 | print tp
8 | # ('python', 12, True, [1, 2])
9 | tp = tuple(tuple(item) for item in lst)
10 | print tp
11 |
12 |
13 | def tuple_shift_left(tup, n):
14 | """
15 | shift tuple over by n indices
16 | :param tup: like (1,2,3,4)
17 | :param n: 1
18 | :return: (2, 3, 4, 1)
19 | """
20 | if n < 0:
21 | raise ValueError('n must be a positive integer')
22 | if not tup or not n:
23 | return tup
24 | n %= len(tup)
25 | return tup[n:] + tup[:n]
26 |
27 | if __name__ == '__main__':
28 | # list_2_tuple()
29 | tp = (1, 2, 3, 4)
30 | # print tuple_shift_left(tp, -1)
31 | # (2, 3, 4, 1)
32 | print tuple_shift_left(tp, 5)
33 | # (2, 3, 4, 1)
34 |
--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_plot.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import matplotlib.pyplot as plt
4 | import pandas as pd
5 | import numpy as np
6 | import seaborn as sns
7 | sns.set()
8 |
9 |
10 | def hist_1():
11 | d = {"label": np.random.choice([0, 1, 2], size=1000),
12 | "values": np.random.randint(0, 10, size=1000)}
13 |
14 | df = pd.DataFrame(d)
15 |
16 | # df['label'].plot.hist(orientation='horizontal', cumulative=True)
17 | fig, axes = plt.subplots(nrows=1, ncols=3, sharex=True, sharey=True)
18 | df.hist(column="values", by="label", ax=axes)
19 | plt.suptitle('Your Title Here', x=0.5, y=1.05, ha='center', fontsize='xx-large')
20 | fig.text(0.5, 0.04, 'common X', ha='center')
21 | fig.text(0.04, 0.5, 'common Y', va='center', rotation='vertical')
22 | plt.show()
23 |
24 | if __name__ == '__main__':
25 | hist_1()
26 | pass
27 |
--------------------------------------------------------------------------------
/python_utils/py_basic/num_ope.py:
--------------------------------------------------------------------------------
1 | # _*_coding:utf-8 _*_
2 | """
3 | float decimal points
4 | https://stackoverflow.com/questions/455612/limiting-floats-to-two-decimal-points
5 | """
6 |
7 |
8 | def round_float():
9 | f = 3.1415
10 | print round(f, 2)
11 | # 3.14
12 |
13 | a = 13.95
14 | print a
15 | # 13.95
16 |
17 | print "%.2f" % a
18 | # 13.95
19 |
20 | print "%.2f" % 13.9499999
21 | # 13.95
22 |
23 | a = 13.949999999999999
24 | print format(a, '.2f')
25 | # 13.95
26 |
27 |
28 | def or_shift():
29 | print 1 >> 1, 1 << 1 # 0 2
30 | print 1 | 4, 1 | 2 # 5 3
31 |
32 |
33 | def print_binary():
34 | m, n = 5, -5
35 | print '{0:b}'.format(m)
36 | print '{0:b}'.format(n)
37 |
38 | print bin(m), bin(n)
39 |
40 |
41 | if __name__ == '__main__':
42 | print_binary()
43 | # round_float()
44 | pass
45 |
--------------------------------------------------------------------------------
/python_utils/numpy_operate/zero_one_empty.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import numpy as np
4 |
5 |
6 | def empty_test():
7 | arr = np.array([[2.5, 3, 1], [1.1, -2, 3]])
8 | print arr
9 |
10 | print np.empty_like(arr)
11 |
12 | print np.empty([2, 2])
13 | # [[2.02554939e-316 2.50034710e-315]
14 | # [1.97872580e-316 2.00283462e-316]]
15 |
16 | print np.empty([2, 2], dtype=int)
17 | # [[58157000 0]
18 | # [58157064 0]]
19 |
20 |
21 | def zero_test():
22 | print np.zeros(5)
23 | # [ 0. 0. 0. 0. 0.]
24 | print np.zeros((5,), dtype=np.int)
25 | # [0 0 0 0 0]
26 | print np.zeros((2, 1))
27 | # [[ 0.]
28 | # [ 0.]]
29 | x = np.arange(6).reshape((2, 3))
30 | print np.zeros_like(x)
31 | # [[0 0 0]
32 | # [0 0 0]]
33 |
34 | if __name__ == '__main__':
35 | # empty_test()
36 | zero_test()
37 | pass
38 |
--------------------------------------------------------------------------------
/python_utils/opencv_basic/url_img_cv.py:
--------------------------------------------------------------------------------
1 | import urllib
2 | import cv2
3 | import numpy as np
4 |
5 |
6 | def url_img_cv():
7 | url = "http://yysnapshot.bs2src9.yy.com/636be6fc25410c5208d4c4ba5a22e2365768ec52?height=960&interval=12465&file=636be6fc25410c5208d4c4ba5a22e2365768ec52&width=544&bucket=yysnapshot&yid=7399736121338363914&day=20170817"
8 | try:
9 | url_response = urllib.urlopen(url)
10 | img_array = np.array(bytearray(url_response.read()), dtype=np.uint8)
11 | img = cv2.imdecode(img_array, -1)
12 | cv2.imshow('URL Image', img)
13 | cv2.waitKey()
14 | except Exception, e:
15 | print e
16 | finally:
17 | print 'no use line, nothing to be cleared'
18 | # can't return None in this scope, because this file is certainly to be executed
19 | # return None
20 |
21 | if __name__ == '__main__':
22 | url_img_cv()
23 |
--------------------------------------------------------------------------------
/python_utils/DbService/sqlalchemy/orm.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | """
4 | This file is about approaches of orm(object relational mapper)
5 | in sql alchemy
6 | """
7 |
8 | from basic import get_db_session
9 | from sqlalchemy import Column, String, Integer
10 | from sqlalchemy.ext.declarative import declarative_base
11 | from sqlalchemy.ext.hybrid import hybrid_property
12 | Base = declarative_base()
13 |
14 |
15 | class User(Base):
16 | __tablename__ = 'user'
17 |
18 | id = Column(Integer, primary_key=True)
19 | nickname = Column(String)
20 | password = Column(String)
21 |
22 | @hybrid_property
23 | def name_pwd(self):
24 | return self.nickname + " " + self.password
25 |
26 |
27 | def query_orm():
28 | session = get_db_session()
29 | user = session.query(User).first()
30 | print user.name_pwd
31 |
32 | if __name__ == '__main__':
33 | query_orm()
34 |
--------------------------------------------------------------------------------
/python_utils/http_basic/flask_web/flask_basic_web.py:
--------------------------------------------------------------------------------
1 | # _*_coding:utf-8 _*_
2 |
3 | from flask import Flask
4 | from flask import abort
5 | from flask import redirect
6 | from flask import request
7 | from flask import Response
8 |
9 |
10 | app = Flask(__name__)
11 |
12 |
13 | @app.route('/')
14 | def index():
15 | return 'from win10 slow machine'
16 |
17 |
18 | @app.route('/user/')
19 | def say_hello(name):
20 | return 'hello, %s
' % name
21 |
22 |
23 | @app.route('/paras/')
24 | def multi_paras():
25 | ret_str = ''
26 | for para in request.args:
27 | print para, request.args[para]
28 | ret_str += para
29 |
30 | return 'multi_paras, %s
' % ret_str
31 |
32 |
33 | if __name__ == '__main__':
34 | # app.run(debug=True)
35 | # so the other machine can visit the website by ip
36 | app.run(host='0.0.0.0', port=5000)
37 |
38 | pass
39 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/linear_regression/sk_example.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import matplotlib.pyplot as plt
4 | import seaborn as sns
5 | import numpy as np
6 | from sklearn.linear_model import LinearRegression
7 | sns.set()
8 |
9 |
10 | def get_data():
11 | rng = np.random.RandomState(1)
12 | x = 10 * rng.rand(50)
13 | y = 2 * x - 5 + rng.randn(50)
14 | # plt.scatter(x, y)
15 | # plt.show()
16 | return x, y
17 |
18 |
19 | def lr_fit():
20 | x, y = get_data()
21 | model = LinearRegression(fit_intercept=True)
22 | model.fit(x[:, np.newaxis], y)
23 | xfit = np.linspace(0, 10, 1000)
24 | yfit = model.predict(xfit[:, np.newaxis])
25 |
26 | print "Model slope: ", model.coef_[0]
27 | print "Model intercept:", model.intercept_
28 |
29 | plt.scatter(x, y)
30 | plt.plot(xfit, yfit)
31 | plt.show()
32 |
33 | if __name__ == '__main__':
34 | lr_fit()
35 | # get_data()
36 | pass
37 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/neural_network_keras/lstm_nlp.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | """
4 | This file is about using lstm to do nlp problem,
5 | classify sentiment of movie review
6 | """
7 | import numpy as np
8 | from keras.datasets import imdb
9 | from keras.models import Sequential
10 | from keras.layers import Dense, LSTM
11 | from keras.layers.embeddings import Embedding
12 | from keras.preprocessing import sequence
13 |
14 |
15 | def generate_data():
16 | # load the dataset but only keep the top n words, zero the rest
17 | top_words = 5000
18 | (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)
19 | # truncate and pad input sequences
20 | max_review_length = 500
21 | X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
22 | X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)
23 |
24 |
25 | if __name__ == '__main__':
26 | generate_data()
27 | pass
28 |
--------------------------------------------------------------------------------
/python_utils/matplot/plot_accuracy_loss.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import numpy as np
4 | import matplotlib.pyplot as plt
5 |
6 |
7 | def draw_result(lst_iter, lst_loss, lst_acc, title):
8 | plt.plot(lst_iter, lst_loss, '-b', label='loss')
9 | plt.plot(lst_iter, lst_acc, '-r', label='accuracy')
10 |
11 | plt.xlabel("n iteration")
12 | plt.legend(loc='upper left')
13 | plt.title(title)
14 | plt.savefig(title+".png") # should before show method
15 |
16 | plt.show()
17 |
18 |
19 | def test_draw():
20 | lst_iter = range(100)
21 | lst_loss = [0.01 * i - 0.01 * i ** 2 for i in xrange(100)]
22 | # lst_loss = np.random.randn(1, 100).reshape((100, ))
23 | lst_acc = [0.01 * i + 0.01 * i ** 2 for i in xrange(100)]
24 | # lst_acc = np.random.randn(1, 100).reshape((100, ))
25 | draw_result(lst_iter, lst_loss, lst_acc, "sgd_method")
26 |
27 |
28 | if __name__ == '__main__':
29 | test_draw()
30 | pass
31 |
--------------------------------------------------------------------------------
/python_utils/py_basic/argv_basic.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | import sys
4 | import logging
5 |
6 |
7 | def parse_js():
8 | kv = {}
9 | for line in sys.stdin:
10 | print line
11 |
12 | if len(sys.argv) > 4:
13 | try:
14 | print sys.argv[1]
15 | id = int(sys.argv[1])
16 | sid = int(sys.argv[2])
17 | pre = int(sys.argv[3])
18 | except ValueError, e:
19 | logging.error("Can't convert id or sid to int")
20 | sys.exit()
21 | else:
22 | msg = sys.argv[4]
23 | for arg in sys.argv[5:]:
24 | msg += " "
25 | msg += arg
26 | kv["id"] = id
27 | kv["alarm"] = 1
28 | kv["msg"] = msg.replace("'", "")
29 |
30 | if __name__ == '__main__':
31 | # config script para
32 | print sys.argv
33 | # ['E:/git_code/python_utils/py_basic/argv_basic.py', '1', '2', '3']
34 | print len(sys.argv)
35 | # 4
36 | # parse_js()
37 |
--------------------------------------------------------------------------------
/python_utils/py_basic/decorator_basic.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | """
4 | python @ usage example
5 | """
6 |
7 |
8 | def hello(fn):
9 | def wrapper():
10 | print "hello, %s" % fn.__name__
11 | fn()
12 | print 'bye, %s' % fn.__name__
13 | return wrapper
14 |
15 |
16 | def do_nothing(fn):
17 | def wrapper():
18 | print 'do not exe fn'
19 | return wrapper
20 |
21 |
22 | @hello
23 | def foo():
24 | print 'I am foo'
25 | # hello, foo
26 | # I am foo
27 | # bye, foo
28 |
29 |
30 | @do_nothing
31 | def foo_nothing():
32 | print 'I am foo_nothing'
33 | # do not exe fn
34 |
35 |
36 | @do_nothing
37 | @hello
38 | def foo_nested():
39 | print 'I am foo_nested'
40 | # do not exe fn
41 |
42 |
43 | @hello
44 | @do_nothing
45 | def foo_nested_v2():
46 | print 'I am foo_nested_v2'
47 |
48 |
49 | if __name__ == '__main__':
50 | # foo()
51 | # foo_nothing()
52 | # foo_nested()
53 | foo_nested_v2()
54 | pass
55 |
--------------------------------------------------------------------------------
/python_utils/numpy_operate/broadcast_demo.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import numpy as np
4 |
5 |
6 | def broadcast_demo():
7 | arr = np.ones((3, 4))
8 | print arr
9 | print arr + 1
10 |
11 | b = np.broadcast(arr, 1)
12 | print b.shape
13 | # (3L, 4L)
14 |
15 |
16 | def bc_demo_2():
17 | a = np.array([1.0, 2.0, 3])
18 | b = np.ones(3) * 2
19 | print b
20 | # [ 2. 2. 2.]
21 | print a * b
22 | # [ 2. 4. 6.]
23 |
24 | b = 2
25 | print a * b
26 | # [ 2. 4. 6.]
27 |
28 | x = np.arange(4).reshape(4, 1)
29 | print x
30 | # [[0]
31 | # [1]
32 | # [2]
33 | # [3]]
34 | y = np.ones(5)
35 | print y
36 | # [ 1. 1. 1. 1. 1.]
37 | z = x + y
38 | print z
39 | # [[ 1. 1. 1. 1. 1.]
40 | # [ 2. 2. 2. 2. 2.]
41 | # [ 3. 3. 3. 3. 3.]
42 | # [ 4. 4. 4. 4. 4.]]
43 | print z.shape
44 | # (4L, 5L)
45 |
46 | if __name__ == '__main__':
47 | # broadcast_demo()
48 | bc_demo_2()
49 | pass
50 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/decision_tree/tree.dot:
--------------------------------------------------------------------------------
1 | digraph Tree {
2 | node [shape=box] ;
3 | 0 [label="loan level <= 0.5\ngini = 0.48\nsamples = 15\nvalue = [6, 9]"] ;
4 | 1 [label="has work <= 0.5\ngini = 0.32\nsamples = 5\nvalue = [4, 1]"] ;
5 | 0 -> 1 [labeldistance=2.5, labelangle=45, headlabel="True"] ;
6 | 2 [label="gini = 0.0\nsamples = 4\nvalue = [4, 0]"] ;
7 | 1 -> 2 ;
8 | 3 [label="gini = 0.0\nsamples = 1\nvalue = [0, 1]"] ;
9 | 1 -> 3 ;
10 | 4 [label="own house <= 0.5\ngini = 0.32\nsamples = 10\nvalue = [2, 8]"] ;
11 | 0 -> 4 [labeldistance=2.5, labelangle=-45, headlabel="False"] ;
12 | 5 [label="age <= 1.5\ngini = 0.4444\nsamples = 6\nvalue = [2, 4]"] ;
13 | 4 -> 5 ;
14 | 6 [label="has work <= 0.5\ngini = 0.4444\nsamples = 3\nvalue = [2, 1]"] ;
15 | 5 -> 6 ;
16 | 7 [label="gini = 0.0\nsamples = 2\nvalue = [2, 0]"] ;
17 | 6 -> 7 ;
18 | 8 [label="gini = 0.0\nsamples = 1\nvalue = [0, 1]"] ;
19 | 6 -> 8 ;
20 | 9 [label="gini = 0.0\nsamples = 3\nvalue = [0, 3]"] ;
21 | 5 -> 9 ;
22 | 10 [label="gini = 0.0\nsamples = 4\nvalue = [0, 4]"] ;
23 | 4 -> 10 ;
24 | }
--------------------------------------------------------------------------------
/python_utils/DbService/sqlalchemy/basic.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | from sqlalchemy import create_engine
4 | from sqlalchemy.orm import sessionmaker
5 | from utils.JsonUtil import get_json_from_file
6 |
7 |
8 | def get_db_session():
9 | db_config_file = '../config/mysql_config.json'
10 | db_js_data = get_json_from_file(db_config_file)
11 | db_connect = 'mysql+mysqldb://{user}:{pwd}@{host}/{db}?charset=utf8'.format(**db_js_data)
12 | print db_connect
13 | # mysql+mysqldb://root:123@localhost/springdemo?charset=utf8
14 | engine = create_engine(db_connect, echo=True)
15 | session = sessionmaker(bind=engine)
16 | return session()
17 |
18 |
19 | def query_example():
20 | session = get_db_session()
21 | print session.execute('show databases').fetchall()
22 | # [(u'springdemo',), (u'test',), (u'world',)]
23 | print session.execute('select * from tb_yylive_news where id = 1').first()
24 | # (1L, u'http://www.bbc.com')
25 |
26 | if __name__ == '__main__':
27 | # get_db_session()
28 | query_example()
29 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/cluster/sk_cluster.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | """
4 | https://jakevdp.github.io/PythonDataScienceHandbook/05.11-k-means.html
5 | """
6 |
7 | from sklearn.cluster import KMeans
8 | from sklearn.datasets.samples_generator import make_blobs
9 | import matplotlib.pyplot as plt
10 | import seaborn as sns
11 | sns.set()
12 | import numpy as np
13 |
14 |
15 | def get_data():
16 | x, y_true = make_blobs(n_samples=300, centers=4,
17 | cluster_std=0.60, random_state=0)
18 | plt.scatter(x[:, 0], x[:, 1], s=50)
19 | # plt.show()
20 | return x
21 |
22 |
23 | def predict():
24 | x = get_data()
25 | kmeans = KMeans(n_clusters=4)
26 | kmeans.fit(x)
27 | y_kmeans = kmeans.predict(x)
28 | # print y_kmeans
29 | plt.scatter(x[:, 0], x[:, 1], c=y_kmeans, s=50, cmap='viridis')
30 | centers = kmeans.cluster_centers_
31 | print centers
32 | plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5)
33 | plt.show()
34 |
35 | if __name__ == '__main__':
36 | # get_data()
37 | predict()
38 | pass
39 |
--------------------------------------------------------------------------------
/python_utils/http_basic/http_realize/http_svr_basic_1.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import socket
4 | import BaseHTTPServer
5 | import time
6 |
7 |
8 | HOST = "127.0.0.1"
9 | PORT = 8888
10 |
11 | RESPONSE = b"""\
12 | HTTP/1.1 200 OK
13 | Content-type: text/html
14 | Content-length: 15
15 |
16 | Hello!
""".replace(b"\n", b"\r\n")
17 |
18 |
19 | RESPONSE = 'a' * (1024 * 1)
20 |
21 |
22 | def test_simple():
23 | server_sock = socket.socket()
24 | server_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
25 | server_sock.bind((HOST, PORT))
26 | server_sock.listen(0)
27 | print "Listening on %s:%s..." % (HOST, PORT)
28 |
29 | while 1:
30 | client_sock, client_addr = server_sock.accept()
31 | print "New connection from %s:%s." % (client_addr)
32 | # client_sock.sendall(RESPONSE)
33 | time.sleep(12)
34 |
35 | data = client_sock.recv(1024)
36 | print "recv :%s" % data
37 |
38 | n = client_sock.send(RESPONSE)
39 | print "just send %s bytes" % n
40 |
41 |
42 | if __name__ == '__main__':
43 | test_simple()
44 | pass
45 |
--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/sk_metric_accuracy.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | import numpy as np
4 | from sklearn.metrics import accuracy_score, confusion_matrix
5 | from sklearn.metrics import classification_report
6 |
7 |
8 | def accuracy_score_demo():
9 | y_true = [0, 1, 2, 3, 2, 6]
10 | y_pred = [0, 2, 1, 3, 4, 7]
11 | print accuracy_score(y_true, y_pred)
12 | # 0.5
13 | print confusion_matrix(y_true, y_pred)
14 |
15 | y_true = [0, 1, 0, 1, 1, 0, 1, 0, 1]
16 | y_pred = [0, 0, 1, 0, 0, 0, 1, 1, 0]
17 | print confusion_matrix(y_true, y_pred)
18 | # [[2 2] 四个 0 两个被识别为了 0, 两个被识别为 1
19 | # [4 1]] 五个 1 四个被识别为了 0, 一个被识别为 1
20 |
21 |
22 | def classification_report_demo():
23 | y_pred = [0, 0, 2, 1, 0]
24 | y_true = [0, 1, 2, 2, 0]
25 | target_names = ['class 0', 'class 1', 'class 2']
26 | print classification_report(y_true, y_pred, target_names=target_names)
27 |
28 | print confusion_matrix(y_true, y_pred)
29 | # [[2 0 0]
30 | # [1 0 0]
31 | # [0 1 1]]
32 |
33 | if __name__ == '__main__':
34 | classification_report_demo()
35 | # accuracy_score_demo()
36 | pass
37 |
--------------------------------------------------------------------------------
/python_utils/thread_process/thread_timer.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | """
3 | python timer usage cited from:
4 | http://www.bogotobogo.com/python/Multithread/python_multithreading_subclassing_Timer_Object.php
5 | https://stackoverflow.com/questions/16578652/threading-timer
6 |
7 | repeat timer
8 | https://stackoverflow.com/questions/12435211/python-threading-timer-repeat-function-every-n-seconds
9 | """
10 |
11 | import time
12 | import threading
13 |
14 |
15 | def hello():
16 | print 'hello world'
17 |
18 |
19 | def hello_2(name):
20 | print 'hello %s \n' % name
21 |
22 |
23 | def test_timer_no_para():
24 | """no parameter timer test
25 | """
26 | t = threading.Timer(5, hello)
27 | # the method will be executed after 5 S
28 | # just run once, then exit
29 | t.start()
30 |
31 |
32 | def test_timer_with_para():
33 | """no parameter timer test
34 | """
35 | name = 'bear fish'
36 | t = threading.Timer(5, hello_2, [name])
37 | # the method will be executed after 5 S
38 | # just run once, then exit
39 | t.start()
40 |
41 | if __name__ == '__main__':
42 | test_timer_no_para()
43 | test_timer_with_para()
44 | pass
45 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/decision_tree/create_data.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import numpy as np
4 |
5 |
6 | def get_loan_data_lh():
7 | """
8 | this data is from li hang's book
9 | the feature is
10 | age(0->young, 1->middle-aged, 2->old),
11 | have work(0->have, 1-not have),
12 | have house(0->have, 1-not have),
13 | loan level(0->just so so, 1->good, 2->very good)
14 | """
15 | x = np.array([
16 | [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2],
17 | [0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
18 | [0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0],
19 | [0, 1, 1, 0, 0, 0, 1, 1, 2, 2, 2, 1, 1, 2, 0]
20 | ])
21 |
22 | x = x.T
23 | y = np.array([0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0])
24 | return x, y
25 |
26 |
27 | def get_data():
28 | x = [[0, 0, 0, 0, 'N'],
29 | [0, 0, 0, 1, 'N'],
30 | [1, 0, 0, 0, 'Y'],
31 | [2, 1, 0, 0, 'Y'],
32 | [2, 2, 1, 0, 'Y'],
33 | [2, 2, 1, 1, 'N'],
34 | [1, 2, 1, 1, 'Y']]
35 |
36 | y = ['outlook', 'temperature', 'humidity', 'windy']
37 | return x, y
38 |
39 | if __name__ == '__main__':
40 |
41 | pass
42 |
--------------------------------------------------------------------------------
/python_utils/thread_process/multitread_profile.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | """
3 | 计算型的任务,使用多线程GIL就会让多线程变慢。我们举个计算斐波那契数列的例子
4 | http://www.dongwm.com/archives/%E4%BD%BF%E7%94%A8Python%E8%BF%9B%E8%A1%8C%E5%B9%B6%E5%8F%91%E7%BC%96%E7%A8%8B-%E7%BA%BF%E7%A8%8B%E7%AF%87/
5 |
6 | """
7 |
8 | import time
9 | import threading
10 |
11 |
12 | def profile(func):
13 | def wrapper(*args, **kwargs):
14 | start = time.time()
15 | func(*args, **kwargs)
16 | end = time.time()
17 | print 'COST: {}'.format(end - start)
18 | return wrapper
19 |
20 |
21 | def fib(n):
22 | if n <= 2:
23 | return 1
24 | return fib(n-1) + fib(n-2)
25 |
26 |
27 | @profile
28 | def nothread():
29 | fib(35)
30 | fib(35)
31 |
32 |
33 | @profile
34 | def hasthread():
35 | for i in range(2):
36 | t = threading.Thread(target=fib, args=(35,))
37 | t.start()
38 | main_thread = threading.currentThread()
39 | for t in threading.enumerate():
40 | if t is main_thread:
41 | continue
42 | t.join()
43 |
44 | if __name__ == '__main__':
45 | nothread()
46 | # COST: 16.8039999008
47 | hasthread()
48 | # COST: 42.8039999008
49 |
--------------------------------------------------------------------------------
/python_utils/http_basic/flask_web/flask_content_type.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | """
4 | cited from http://blog.luisrei.com/articles/flaskrest.html
5 | test different http request head Content-Type
6 | """
7 |
8 | from flask import Flask
9 | from flask import request
10 | import json
11 |
12 | app = Flask(__name__)
13 |
14 |
15 | @app.route('/message', methods=['POST'])
16 | def api_msg():
17 | if request.headers['Content-Type'] == 'text/plain':
18 | return "Text Message: " + request.data
19 |
20 | elif request.headers['Content-Type'] == 'application/json':
21 | return "Json Message: " + json.dumps(request.json)
22 |
23 | elif request.headers['Content-Type'] == 'application/octet-stream':
24 | print len(request.data)
25 | with open('./file.name', 'wb') as f:
26 | f.write(request.data)
27 |
28 | return "binary file written"
29 |
30 | elif request.headers['Content-Type'] == 'multipart/form-data':
31 | print "111"
32 | print request.args
33 | else:
34 | return "415 unsupported media type"
35 |
36 |
37 | if __name__ == '__main__':
38 | # app.run(debug=True)
39 | app.run(host='0.0.0.0')
40 | pass
41 |
--------------------------------------------------------------------------------
/python_utils/numpy_operate/array_multiply.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | """
4 | This file is some demo about numpy array multiply operator
5 | """
6 | import numpy as np
7 |
8 |
9 | def one_dim_arr_multiply():
10 | """
11 | the difference between * and dot operator about numpy.array()
12 | of one dimension
13 | :return none:
14 | """
15 | arr1 = np.array([1, 2])
16 | arr2 = np.array([3, 4])
17 | print arr1 * arr2 # -> [3 8]
18 | # for 1 dim array np.dot gets inner product of vector
19 | print np.dot(arr1, arr2.transpose()) # 11
20 | print arr1, arr2.transpose()
21 | print np.dot(arr1, arr2) # 11
22 |
23 |
24 | def mul_dim_arr_multiply():
25 | """
26 | the difference between * and dot operator about numpy.array()
27 | of multiple dimension
28 | """
29 | arr1 = np.array([[1], [2]])
30 | arr2 = np.array([[3], [4]])
31 | print arr1 * arr2
32 | # >> [[3]
33 | # [8]]
34 | print np.dot(arr1, arr2.transpose())
35 | # >>>[[3 4]
36 | # [6 8]]
37 | print np.dot(arr1, arr2)
38 | # ValueError: shapes (2,1) and (2,1) not aligned: 1 (dim 1) != 2 (dim 0)
39 |
40 | if __name__ == '__main__':
41 | # one_dim_arr_multiply()
42 | mul_dim_arr_multiply()
43 |
--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_feature2value.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | import numpy as np
4 | import pandas as pd
5 | import matplotlib.pyplot as plt
6 |
7 |
8 | def factorize_1():
9 | df = pd.DataFrame({"A": list('cbaa'),
10 | "B": list('zyxz')})
11 |
12 | print df
13 | # A B
14 | # 0 c z
15 | # 1 b y
16 | # 2 a x
17 | # 3 a z
18 |
19 | print df.apply(lambda col: pd.factorize(col, sort=True)[0])
20 | # A B
21 | # 0 2 2
22 | # 1 1 1
23 | # 2 0 0
24 | # 3 0 2
25 |
26 | print df.apply(lambda col: pd.factorize(col)[0])
27 | # A B
28 | # 0 0 0
29 | # 1 1 1
30 | # 2 2 2
31 | # 3 2 0
32 |
33 |
34 | def count_1():
35 | df = pd.DataFrame({'a': list('absba')})
36 | print df.groupby('a')['a'].count()
37 | # a
38 | # a 2
39 | # b 2
40 | # s 1
41 |
42 | print df['a'].value_counts()
43 | # b 2
44 | # a 2
45 | # s 1
46 | df['freq'] = df.groupby('a')['a'].transform('count')
47 | print df
48 | # a freq
49 | # 0 a 2
50 | # 1 b 2
51 | # 2 s 1
52 | # 3 b 2
53 | # 4 a 2
54 |
55 | if __name__ == '__main__':
56 | count_1()
57 | # factorize_1()
58 | pass
59 |
--------------------------------------------------------------------------------
/python_utils/http_basic/http_realize/http_realize_1/http_server_1.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | import socket
4 |
5 |
6 | RESPONSE = b"""\
7 | HTTP/1.1 200 OK
8 | Content-type: text/html
9 | Content-length: 15
10 |
11 | Hello!
""".replace(b"\n", b"\r\n")
12 |
13 |
14 | def http_server_1():
15 | # host, port = "127.0.0.1", 8888
16 | host, port = "127.0.0.1", 0
17 |
18 | # By default, socket.socket creates TCP sockets.
19 | # with socket.socket() as server_sock:
20 | server_sock = socket.socket()
21 | # This tells the kernel to reuse sockets that are in `TIME_WAIT` state.
22 | server_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
23 |
24 | # This tells the socket what address to bind to.
25 | server_sock.bind((host, port))
26 |
27 | server_sock.listen(0)
28 |
29 | # print "listening on %s:%s" % (host, port)
30 | print "listening on %s:%s" % (server_sock.getsockname())
31 |
32 | while True:
33 | client_sock, client_addr = server_sock.accept()
34 | print "new connection from %s: %s" % (client_addr, client_sock)
35 | data = client_sock.recv(1024)
36 | print "recv :%s" % data
37 |
38 | with client_sock:
39 | client_sock.sendall(RESPONSE)
40 |
41 |
42 | if __name__ == '__main__':
43 | http_server_1()
44 | pass
45 |
--------------------------------------------------------------------------------
/python_utils/numpy_operate/np_distance.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | """
4 |
5 | """
6 | import numpy as np
7 |
8 |
9 | def euclidean_distance():
10 | a1 = np.array([1, 2, 3])
11 | a2 = np.array([3, 4, 5])
12 | print np.square(a1, a2)
13 |
14 |
15 | def np_sum():
16 | a1 = np.array([1, 2, 3])
17 | a2 = np.array([3, 4, 5])
18 | print a1 - a2
19 | # [-2 -2 -2]
20 | print (a1 - a2)**2
21 | # [4 4 4]
22 | print np.sum(a1 - a2)
23 | # -6
24 | print np.sum((a1 - a2)**2)
25 | # 12
26 |
27 |
28 | def np_sqrt():
29 | a1 = np.array([1, 4, 9])
30 | print np.sqrt(a1)
31 | # [ 1. 2. 3.]
32 |
33 |
34 | def euclidean_distance_v2():
35 | a1 = np.array([1, 2, 3])
36 | a2 = np.array([3, 4, 5])
37 | print np.sqrt(np.sum((a1 - a2)**2))
38 | # 3.46410161514
39 |
40 |
41 | def euclidean_distance_v3():
42 | a1 = np.array([1, 2, 3])
43 | a2 = np.array([3, 4, 5])
44 | print np.linalg.norm(a1 - a2)
45 | # 3.46410161514
46 |
47 |
48 | def eu_distance():
49 | a1 = [1, 2, 3]
50 | a2 = [3, 4, 5]
51 | from math import sqrt
52 | print sqrt(sum((a - b)**2 for a, b in zip(a1, a2)))
53 | # 3.46410161514
54 |
55 | if __name__ == '__main__':
56 | euclidean_distance()
57 | # euclidean_distance_v2()
58 | # np_sum()
59 | # euclidean_distance_v3()
60 | # np_sqrt()
61 | pass
62 |
--------------------------------------------------------------------------------
/python_utils/thread_process/thread_condition.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | """
3 | This file is about a sample demo about threading.condition
4 | """
5 |
6 | import threading
7 | import time
8 | import logging
9 | from basic_thread import join_all_others_thread
10 | logging.basicConfig(level=logging.DEBUG,
11 | format='%(asctime)s %(threadName)s %(message)s',
12 | datefmt='%Y-%m-%d %I:%M:%S')
13 |
14 |
15 | def notify_condition(con):
16 | with con:
17 | logging.debug('now notify all the condition')
18 | con.notifyAll()
19 |
20 |
21 | def wait_condition(con):
22 | with con:
23 | logging.debug('I am waiting for an condition')
24 | con.wait()
25 | logging.debug('I get the condition.....')
26 |
27 |
28 | def test_demo():
29 | con = threading.Condition()
30 | t_w1 = threading.Thread(name='t_w1', target=wait_condition, args=(con, ))
31 | t_w2 = threading.Thread(name='t_w2', target=wait_condition, args=(con, ))
32 | t_n1 = threading.Thread(name='t_n1', target=notify_condition, args=(con, ))
33 | t_w1.start()
34 | t_w2.start()
35 |
36 | logging.debug('now main thread sleeping 5S')
37 | time.sleep(5)
38 | t_n1.start()
39 |
40 | join_all_others_thread()
41 | logging.debug('now all have been done')
42 |
43 | if __name__ == '__main__':
44 | test_demo()
45 | pass
46 |
--------------------------------------------------------------------------------
/python_utils/py_basic/yield_ope.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 |
4 | def f_135():
5 | yield 1
6 | yield 3
7 | yield 5
8 |
9 |
10 | def demo_1():
11 | for val in f_135():
12 | print val
13 |
14 | # 1 3 5
15 |
16 | generator = f_135()
17 | print next(generator)
18 | # 1
19 | print next(generator)
20 | # 3
21 | print next(generator)
22 | # 5
23 |
24 |
25 | def fibonacci(n):
26 | cur = 1
27 | pre = 0
28 | count = 0
29 | while count < n:
30 | yield cur
31 | cur, pre = cur + pre, cur
32 | count += 1
33 |
34 |
35 | def demo_fib():
36 | ge_fib = fibonacci(10)
37 | for i in ge_fib:
38 | print i, ", "
39 | # 1 , 1 , 2 , 3 , 5 , 8 , 13 , 21 , 34 , 55
40 |
41 | ge_fib = fibonacci(5)
42 | print next(ge_fib)
43 | # 1
44 | print next(ge_fib)
45 | # 1
46 |
47 |
48 | def read_file(f_path='__init__.py'):
49 | # read 60 bytes once
50 | bt_once = 60
51 | with open(f_path, 'rb') as fmp3:
52 | data = fmp3.read(bt_once)
53 | while data:
54 | yield data
55 | data = fmp3.read(bt_once)
56 |
57 |
58 | def demo_read_file():
59 | for txt in read_file():
60 | print txt
61 |
62 | # # coding:utf-8
63 | # if __name__ == '__main__':
64 | # pass
65 |
66 | if __name__ == '__main__':
67 | # demo_1()
68 | # demo_fib()
69 | demo_read_file()
70 | pass
71 |
--------------------------------------------------------------------------------
/python_utils/netsocket/ip_int.py:
--------------------------------------------------------------------------------
1 | import struct
2 | import socket
3 | import math
4 |
5 |
6 | def ip2int(addr):
7 | return struct.unpack("!I", socket.inet_aton(addr))[0]
8 |
9 |
10 | def int2ip(addr):
11 | return socket.inet_ntoa(struct.pack("!I", addr))
12 |
13 |
14 | def str_ip2_int(s_ip='192.168.1.100'):
15 | lst = [int(item) for item in s_ip.split('.')]
16 | print lst
17 | # [192, 168, 1, 100]
18 |
19 | int_ip = lst[3] | lst[2] << 8 | lst[1] << 16 | lst[0] << 24
20 | return int_ip # 3232235876
21 |
22 |
23 | def str_ip2_int_v2(s_ip='192.168.1.100'):
24 | lst = [int(item) for item in s_ip.split('.')]
25 | lst = map(int, s_ip.split('.'))
26 | print lst
27 | # [192, 168, 1, 100]
28 |
29 | int_ip = lst[3] + lst[2] * pow(2, 8) + lst[1] * pow(2, 16) + lst[0] * pow(2, 24)
30 | return int_ip # 3232235876
31 |
32 |
33 | def int_ip2str(int_ip=3232235876):
34 | lst = []
35 | for i in xrange(4):
36 | shift_n = 8 * i
37 | lst.insert(0, str((int_ip >> shift_n) & 0xff))
38 |
39 | return ".".join(lst)
40 |
41 |
42 | if __name__ == '__main__':
43 | str_ip = '192.168.1.100'
44 | int_ip = ip2int(str_ip)
45 | print "%s -> int is: %s" % (str_ip, int_ip)
46 | # 192.168.1.100 -> int is: 3232235876
47 |
48 | str_ip = int2ip(int_ip)
49 | print "%s -> str is: %s" % (int_ip, str_ip)
50 | # 3232235876 -> str is: 192.168.1.100
51 |
52 | print str_ip2_int_v2()
53 |
54 | print int_ip2str(int_ip)
55 |
56 |
--------------------------------------------------------------------------------
/python_utils/http_basic/wsgi_demo/wsgi_demo.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | """
4 | C:\Users\DW>curl -i -X POST 127.0.0.1:8080
5 | HTTP/1.0 200 OK
6 | Date: Fri, 01 Dec 2017 12:06:27 GMT
7 | Server: WSGIServer/0.1 Python/2.7.12
8 | Content-Type: text/html
9 | Content-Length: 257
10 |
11 | Hello User!
12 |
13 | .......
14 |
15 |
16 | """
17 |
18 | from wsgiref.simple_server import make_server
19 |
20 |
21 | def application(environ, start_response):
22 | start_response("200 OK", [("Content-type", "text/plain")])
23 | return ["Hello my friend!".encode("utf-8")]
24 |
25 |
26 | form = """ Hello User!
27 |
28 |
33 |
34 |
35 | """
36 |
37 |
38 | def app_post(environ, start_response):
39 | html = form
40 | start_response('200 OK', [('Content-Type', 'text/html')])
41 | print environ
42 | if environ['REQUEST_METHOD'] == 'POST':
43 | return [html]
44 |
45 | elif environ['REQUEST_METHOD'] == 'GET':
46 | return ["get request".encode("utf-8")]
47 |
48 | if __name__ == '__main__':
49 | # server = make_server('localhost', 8080, application)
50 | server = make_server('localhost', 8080, app_post)
51 | server.serve_forever()
52 | pass
53 |
--------------------------------------------------------------------------------
/python_utils/py_basic/random_operator.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | import random
3 | import string
4 |
5 |
6 | def generate_random_num_str(length):
7 | return ''.join(random.choice(string.letters) for i in range(length))
8 |
9 |
10 | def get_random_int():
11 | # n -> [0, 10]
12 | n = random.randint(0, 10)
13 | print n
14 |
15 | print random.randint(180, 200)
16 |
17 |
18 | def rand_range():
19 | print random.randrange(1, 100)
20 | # like 23
21 | print random.randrange(0, 100, 10)
22 | # 20
23 | print random.randrange(0, 100, 10)
24 | # 90
25 |
26 |
27 | def random_seed():
28 | sd = 3
29 | random.seed(sd)
30 | print "Random number with seed 10 : ", random.random()
31 |
32 | # It will generate same random number(do random.seed(sd) every time before)
33 | random.seed(sd)
34 | print "Random number with seed 10 : ", random.random()
35 |
36 | # It will generate same random number
37 | random.seed(sd)
38 | print "Random number with seed 10 : ", random.random()
39 |
40 | if __name__ == '__main__':
41 | # random_seed()
42 | # rand_range()
43 | # get_random_int()
44 | # print random.choice(string.letters)
45 | # f
46 | # print generate_random_num_str(5)
47 | # bjSQU
48 | # print string.letters
49 | # abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
50 | import time
51 | t = int(time.time())
52 | print random.randint(1000000, 100000000)
53 |
54 | print random.choice([1, 3, 5, 7])
55 |
56 | pass
57 |
--------------------------------------------------------------------------------
/python_utils/py_basic/decorator_set.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | """
4 | cited from stack overflow
5 | thread safe set, by decorator
6 | """
7 | from threading import Lock
8 |
9 |
10 | def locked_method(method):
11 | """Method decorator. Requires a lock object at self._lock"""
12 | def newmethod(self, *args, **kwargs):
13 | with self._lock:
14 | return method(self, *args, **kwargs)
15 | return newmethod
16 |
17 |
18 | class DecoratorLockedSet(set):
19 | def __init__(self, *args, **kwargs):
20 | self._lock = Lock()
21 | super(DecoratorLockedSet, self).__init__(*args, **kwargs)
22 |
23 | @locked_method
24 | def add(self, *args, **kwargs):
25 | return super(DecoratorLockedSet, self).add(args, kwargs)
26 |
27 | @locked_method
28 | def remove(self, *args, **kwargs):
29 | return super(DecoratorLockedSet, self).remove(args, kwargs)
30 |
31 |
32 | class LockedSet(set):
33 | """A set where add(), remove(), and 'in' operator are thread-safe"""
34 |
35 | def __init__(self, *args, **kwargs):
36 | self._lock = Lock()
37 | super(LockedSet, self).__init__(*args, **kwargs)
38 |
39 | def add(self, elem):
40 | with self._lock:
41 | super(LockedSet, self).add(elem)
42 |
43 | def remove(self, elem):
44 | with self._lock:
45 | super(LockedSet, self).remove(elem)
46 |
47 | def __contains__(self, elem):
48 | with self._lock:
49 | super(LockedSet, self).__contains__(elem)
50 |
51 |
--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/split_train_test_data.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import pandas as pd
4 | import numpy as np
5 |
6 |
7 | def split_1():
8 | df = pd.DataFrame({'lst1': range(5),
9 | 'lst2': range(5)[::-1]},
10 | columns=['lst1', 'lst2'])
11 |
12 | print df
13 | # lst1 lst2
14 | # 0 0 4
15 | # 1 1 3
16 | # 2 2 2
17 | # 3 3 1
18 | # 4 4 0
19 |
20 | train = df.sample(frac=0.8, random_state=200)
21 | print train
22 | # lst1 lst2
23 | # 3 3 1
24 | # 4 4 0
25 | # 0 0 4
26 | # 1 1 3
27 |
28 | test = df.drop(train.index)
29 | print test
30 | # lst1 lst2
31 | # 2 2 2
32 |
33 |
34 | def split_2():
35 | from sklearn.model_selection import train_test_split
36 |
37 | df = pd.DataFrame({'lst1': range(5),
38 | 'lst2': range(5)[::-1]},
39 | columns=['lst1', 'lst2'])
40 |
41 | print df
42 | # lst1 lst2
43 | # 0 0 4
44 | # 1 1 3
45 | # 2 2 2
46 | # 3 3 1
47 | # 4 4 0
48 |
49 | train, test = train_test_split(df, test_size=0.2)
50 |
51 | print train
52 | # lst1 lst2
53 | # 2 2 2
54 | # 0 0 4
55 | # 4 4 0
56 | # 1 1 3
57 |
58 | print test
59 | # lst1 lst2
60 | # 3 3 1
61 |
62 | if __name__ == '__main__':
63 | split_2()
64 | # split_1()
65 | pass
66 |
--------------------------------------------------------------------------------
/python_utils/numpy_operate/arr_equal_close.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | import numpy as np
4 |
5 |
6 | def arr_equal():
7 | ar1 = np.array([[1, 2], [3, 4]])
8 | ar2 = np.array([[1, 2], [3, 4]])
9 | ar3 = np.array([[1, 2], [3, 5]])
10 |
11 | print np.array_equal(ar1, ar2)
12 | # True
13 | print np.array_equal(ar1, ar3)
14 | # False
15 |
16 |
17 | def arr_equiv():
18 | ar1 = np.array([[1, 2], [3, 4]])
19 | ar2 = np.array([[1, 2]])
20 | ar3 = np.array([[1, 2], [1, 2]])
21 | ar4 = np.array([1, 2])
22 | print np.array_equiv(ar1, ar2)
23 | # False
24 | print np.array_equiv(ar1, ar4)
25 | # False
26 | print np.array_equiv(ar2, ar3)
27 | # True
28 |
29 |
30 | def arr_close():
31 | ar1 = np.array([[1, 2], [3, 4]])
32 | ar2 = np.array([[1.1, 2.1], [3.1, 4.1]])
33 | ar3 = np.array([[1.00001, 2.00001], [3.00001, 4.00001]])
34 | ar4 = np.array([[1.0001, 2.0001], [3.0001, 4.0001]])
35 |
36 | print np.isclose(ar1, ar2)
37 | # [[False False]
38 | # [False False]]
39 | print np.isclose(ar1, ar3)
40 | # [[ True True]
41 | # [ True True]]
42 | print np.isclose(ar1, ar4)
43 | # [[False False]
44 | # [False False]]
45 | print np.isclose(ar1, ar4, atol=1.e-4)
46 | # [[ True True]
47 | # [ True True]]
48 |
49 | print np.allclose([1e10, 1e-7], [1.00001e10, 1e-8])
50 | # False
51 | print np.allclose([1e10, 1e-8], [1.00001e10, 1e-9])
52 | # True
53 |
54 | if __name__ == '__main__':
55 | arr_close()
56 | # arr_equal()
57 | # arr_equiv()
58 | pass
59 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/logistic_regression/lr_sklearn_v1.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | # Code source: Gaël Varoquaux
4 | # Modified for documentation by Jaques Grobler
5 | # License: BSD 3 clause
6 | # http://scikit-learn.org/stable/auto_examples/linear_model/plot_iris_logistic.html
7 |
8 | import numpy as np
9 | import matplotlib.pyplot as plt
10 | from sklearn import linear_model, datasets
11 |
12 | # import some data to play with
13 | iris = datasets.load_iris()
14 | X = iris.data[:, :2] # we only take the first two features.
15 | Y = iris.target
16 |
17 | h = .02 # step size in the mesh
18 |
19 | logreg = linear_model.LogisticRegression(C=1e5)
20 |
21 | # we create an instance of Neighbours Classifier and fit the data.
22 | logreg.fit(X, Y)
23 |
24 | # Plot the decision boundary. For that, we will assign a color to each
25 | # point in the mesh [x_min, x_max]x[y_min, y_max].
26 | x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
27 | y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
28 | xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
29 | Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])
30 |
31 | # Put the result into a color plot
32 | Z = Z.reshape(xx.shape)
33 | plt.figure(1, figsize=(4, 3))
34 | plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)
35 |
36 | # Plot also the training points
37 | plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k', cmap=plt.cm.Paired)
38 | plt.xlabel('Sepal length')
39 | plt.ylabel('Sepal width')
40 |
41 | plt.xlim(xx.min(), xx.max())
42 | plt.ylim(yy.min(), yy.max())
43 | plt.xticks(())
44 | plt.yticks(())
45 |
46 | plt.show()
47 |
--------------------------------------------------------------------------------
/python_utils/py_basic/dw_img_from_google.py:
--------------------------------------------------------------------------------
1 | from google_images_download import google_images_download
2 |
3 | chrome_driver_path = "C:/Program Files (x86)/Google/Chrome/Application/chromedriver.exe"
4 | out_put_path = "E:/face_rec/yy_face_demand/cartoon_sample/"
5 | out_put_path = "E:/face_rec/short_vedio_famous_people/people_lst/"
6 | out_put_path = "E:/people_detection/test_datasets/"
7 |
8 |
9 | def dw(s_keyword):
10 | """
11 | :param s_keyword: like "pet cat images, pet dog images"
12 | :return: None
13 | """
14 | # class instantiation
15 | response = google_images_download.googleimagesdownload()
16 |
17 | # creating list of arguments
18 | arguments = {"keywords": s_keyword,
19 | "limit": 200, "print_urls": True,
20 | "output_directory": out_put_path,
21 | "chromedriver": chrome_driver_path}
22 |
23 | # passing the arguments to the function
24 | paths = response.download(arguments)
25 |
26 | # printing absolute paths of the downloaded images
27 | print(paths)
28 |
29 |
30 | def do_dw():
31 | lst_keywords = ["pedestrian images"
32 | ]
33 | dw(lst_keywords[0])
34 | pass
35 |
36 |
37 | def dw_famous():
38 | f_path = 'E:/face_rec/short_vedio_famous_people/famous_list.txt'
39 | lst_famous = open(f_path).read().split('\n')
40 |
41 | # print len(lst_famous)
42 | # print ",".join(lst_famous[-3:])
43 |
44 | s_key_word = ",".join(lst_famous)
45 | dw(s_key_word)
46 |
47 |
48 | if __name__ == '__main__':
49 | dw_famous()
50 | # do_dw()
51 | pass
52 |
--------------------------------------------------------------------------------
/python_utils/http_basic/socket_basic/basic_client.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 | import socket
3 | import time
4 |
5 |
6 | def basic_block():
7 | s = socket.socket()
8 | s.connect(('www.baidu.com', 80))
9 | print("We are connected to %s:%d" % s.getpeername())
10 | # We are connected to 61.135.169.121:80
11 |
12 |
13 | def basic_non_block():
14 | s = socket.socket()
15 | s.setblocking(0)
16 |
17 | try:
18 | s.connect(('http://vis-www.cs.umass.edu', 80))
19 | except socket.error as e:
20 | print(str(e))
21 | i = 0
22 | while True:
23 | try:
24 | print("We are connected to %s:%d" % s.getpeername())
25 | break
26 | except:
27 | print("Let's do some math while waiting: %d" % i)
28 | i += 1
29 | else:
30 | print("We are connected to %s:%d" % s.getpeername())
31 |
32 |
33 | def basic_connect_rcv():
34 | """
35 | connect to server and receive msg from server
36 | :return:
37 | """
38 | s = socket.socket()
39 | s.connect(('127.0.0.1', 8888))
40 | # s.connect(('221.228.106.244', 8888))
41 | print("We are connected to %s:%d" % s.getpeername())
42 | # s_rcv = str(s.recv(1024 * 1024))
43 | # print 'recv, len: %s, %s' % (len(s_rcv), s_rcv)
44 |
45 | buf = '123456789 -> '
46 | # time.sleep(68)
47 | for i in xrange(5):
48 | n = s.send(buf)
49 | print "now send %s" % n
50 | time.sleep(2)
51 |
52 | if __name__ == '__main__':
53 | # basic_block()
54 | # basic_non_block()
55 | basic_connect_rcv()
56 | pass
--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_index.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import pandas as pd
4 |
5 |
6 | def test_index_1():
7 | idx = pd.Index([2, 3, 7, 5, 3])
8 | print idx
9 | # Int64Index([2, 3, 7, 5, 3], dtype='int64')
10 | print idx[1]
11 | # 3
12 | print idx[::2]
13 | # Int64Index([2, 7, 3], dtype='int64')
14 |
15 | print idx.size, idx.shape, idx.ndim, idx.dtype
16 | # 5 (5L,) 1 int64
17 |
18 |
19 | def test_index_2():
20 | idx_1 = pd.Index([1, 3, 5, 7, 9])
21 | idx_2 = pd.Index([2, 3, 5, 7, 11])
22 |
23 | print idx_1 & idx_2
24 | # Int64Index([3, 5, 7], dtype='int64')
25 | print idx_1 | idx_2
26 | # Int64Index([1, 2, 3, 5, 7, 9, 11], dtype='int64')
27 | print idx_1 ^ idx_2
28 | # Int64Index([1, 2, 9, 11], dtype='int64')
29 |
30 |
31 | def rename_columns():
32 | df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
33 | print df
34 | # A B C
35 | # 0 1 4 7
36 | # 1 2 5 8
37 | # 2 3 6 9
38 |
39 | # df = df.rename(columns={"A": "a"}, inplace=True)
40 | df.rename(columns={"B": "b"}, inplace=True)
41 | print df
42 | # A b C
43 | # 0 1 4 7
44 | # 1 2 5 8
45 | # 2 3 6 9
46 |
47 | df.columns = list('abc')
48 |
49 | print df
50 | # a b c
51 | # 0 1 4 7
52 | # 1 2 5 8
53 | # 2 3 6 9
54 |
55 | df.columns.values[2] = 'C'
56 | print df
57 | # a b C
58 | # 0 1 4 7
59 | # 1 2 5 8
60 | # 2 3 6 9
61 |
62 | if __name__ == '__main__':
63 | rename_columns()
64 | # test_index_2()
65 | # test_index_1()
66 | pass
67 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/decision_tree/dtree_scratch.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import numpy as np
4 |
5 |
6 | class DTree(object):
7 | def __init__(self):
8 | pass
9 |
10 | def _cal_entropy(self, arr_prob):
11 | """
12 | for example arr_prob like [0.5, 0.5]
13 | return -1 * 0.5 *log0.5 + -1 * 0.5 * log0.5 = 1
14 | :param arr_prob: one dimension probability array
15 | :return: entropy
16 | """
17 | # -1 * sum(Pi * logPi)
18 | return np.sum(-1 * np.log2(arr_prob) * arr_prob)
19 |
20 | def _cal_conditional_entropy(self, X, Y):
21 | """
22 | calculate conditional entropy H(D|A)
23 | :return:
24 | """
25 | pass
26 |
27 | def _cal_class_entropy(self, y):
28 | """
29 | calculate data set entropy
30 |
31 | :param y:
32 | :return:
33 | """
34 | num = len(y)
35 | print num # 15
36 | unique_class, counter = np.unique(y, return_counts=True)
37 | print unique_class, counter
38 | # [0 1] [6 9]
39 | # calculate each class probability
40 | class_prob = [c * 1.0 / num for c in counter]
41 | print class_prob
42 | # [0.40000000000000002, 0.59999999999999998]
43 | print self._cal_entropy(class_prob)
44 | # 0.970950594455
45 |
46 |
47 | def test_cal_class_entropy():
48 | from create_data import get_loan_data_lh
49 | X, Y = get_loan_data_lh()
50 | dt = DTree()
51 | dt._cal_class_entropy(Y)
52 |
53 | if __name__ == '__main__':
54 | test_cal_class_entropy()
55 | # dt = DTree()
56 |
57 | pass
58 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/knearest/knn_scratch.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | """
3 | implement knn from scratch
4 | """
5 | from collections import Counter
6 | import numpy as np
7 |
8 |
9 | class KnnScratch(object):
10 |
11 | def fit(self, x_train, y_train):
12 | self.x_train = x_train
13 | self.y_train = y_train
14 |
15 | def predict_once(self, x_test, k):
16 | lst_distance = []
17 | lst_predict = []
18 |
19 | for i in xrange(len(self.x_train)):
20 | # euclidean distance
21 | distance = np.linalg.norm(x_test - self.x_train[i, :])
22 | # distance = np.sqrt(np.sum(np.square(x_test, x_train[i, :])))
23 | lst_distance.append([distance, i])
24 |
25 | lst_distance = sorted(lst_distance)
26 |
27 | for i in xrange(k):
28 | idx = lst_distance[i][1]
29 | lst_predict.append(self.y_train[idx])
30 |
31 | return Counter(lst_predict).most_common(1)[0][0]
32 |
33 | def predict(self, x_test, k):
34 | lst_predict = []
35 | for i in xrange(len(x_test)):
36 | lst_predict.append(self.predict_once(x_test[i, :], k))
37 |
38 | return lst_predict
39 |
40 | if __name__ == '__main__':
41 | x_train = np.array([[1, 1, 1], [2, 2, 2], [10, 10, 10], [13, 13, 13]])
42 | y_train = ['aa', 'aa', 'bb', 'bb']
43 | x_test = np.array([[3, 2, 4], [9, 13, 11]])
44 |
45 | k = 2
46 | knn = KnnScratch()
47 | knn.fit(x_train, y_train)
48 |
49 | print knn.predict_once(x_test[0], k)
50 | # aa
51 |
52 | print knn.predict(x_test, k)
53 | # ['aa', 'bb']
54 |
55 | pass
56 |
--------------------------------------------------------------------------------
/python_utils/thread_process/pool_dummy.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | """
3 | This file is about thread(dummy)/process pool
4 | """
5 | from multiprocessing import Pool as ProcessPool
6 | from multiprocessing.dummy import Pool as ThreadPool
7 | import logging
8 | from time import sleep, time
9 | from random import randrange
10 |
11 |
12 | # logging.basicConfig(level=logging.DEBUG,
13 | # format='%(levelname)s %(asctime)s %(threadName)s %(message)s',
14 | # datefmt='%Y-%m-%d %I:%M:%S')
15 |
16 | logging.basicConfig(level=logging.DEBUG,
17 | format='%(levelname)s %(asctime)s %(processName)s %(message)s',
18 | datefmt='%Y-%m-%d %I:%M:%S')
19 |
20 |
21 | def handler(sec):
22 | logging.debug('now I will sleep %s S', sec)
23 | sleep(sec)
24 |
25 |
26 | def get_pool(b_dummy=True, num=4):
27 | """
28 | if b_dummy is True then get ThreadPool, or get process pool
29 | :param b_dummy: dummy thread Pool or Process pool
30 | :param num: thread or process num
31 | :return: pool object
32 | """
33 | if b_dummy:
34 | pool = ThreadPool(num)
35 | else:
36 | pool = ProcessPool(num)
37 |
38 | return pool
39 |
40 |
41 | def test_dummy_thread_pool():
42 | start_time = time()
43 | lst_sleep_sec = [randrange(3, 10) for i in xrange(10)]
44 | pool = get_pool(b_dummy=False)
45 |
46 | results = pool.map(handler, lst_sleep_sec)
47 | logging.debug(results)
48 | pool.close()
49 | pool.join()
50 | logging.debug('time consume %s', time() - start_time)
51 | pass
52 |
53 |
54 | if __name__ == '__main__':
55 | test_dummy_thread_pool()
56 | pass
57 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/Bayes/bayes_sklearn.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | """
3 | Data set from
4 | https://archive.ics.uci.edu/ml/datasets/Pima+Indians+Diabetes
5 | https://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data
6 |
7 | """
8 | import numpy as np
9 | from sklearn.naive_bayes import GaussianNB, BernoulliNB
10 | import pandas as pd
11 | from sklearn.model_selection import train_test_split, cross_val_score
12 |
13 |
14 | def sk_demo_1():
15 | X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
16 | Y = np.array([1, 1, 1, 2, 2, 2])
17 | clf = GaussianNB()
18 | clf.fit(X, Y)
19 | test_item = np.array([[-0.8, -1]])
20 | print clf.predict(test_item)
21 | # [1]
22 | print clf.get_params()
23 |
24 |
25 | def load_diabetes_data():
26 | path = '../dataset/bayes/pima-indians-diabetes.txt'
27 | df = pd.read_csv(path, header=None)
28 | print df.head()
29 | # the below get 9 columns, not 8 why?
30 | # x = np.array(df.ix[:, 0:8])
31 | x = np.array(df.ix[:, 0:7])
32 | print x.shape, x[0]
33 | y = np.array(df.ix[:, 8])
34 | print y.shape, y[0]
35 |
36 | return train_test_split(x, y, test_size=0.33, random_state=40)
37 |
38 |
39 | def sk_nb_diabetes():
40 | x_train, x_test, y_train, y_test = load_diabetes_data()
41 | clf = GaussianNB()
42 |
43 |
44 | def sk_bernoulli_demo():
45 | x = np.random.randint(2, size=(6, 100))
46 | y = np.array([1, 2, 3, 4, 4, 5])
47 | clf = BernoulliNB()
48 | clf.fit(x, y)
49 | # print clf.predict(x[2:3])
50 | print clf.predict(x[2])
51 |
52 | if __name__ == '__main__':
53 | # sk_demo_1()
54 | # load_diabetes_data()
55 | sk_bernoulli_demo()
56 | pass
57 |
--------------------------------------------------------------------------------
/python_utils/py_basic/calendar_ope.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | import datetime
3 | import calendar
4 | from dateutil import rrule
5 |
6 |
7 | def get_all_day_v1():
8 | from datetime import datetime
9 | d1 = '20171030'
10 | d2 = '20171102'
11 | for dt in rrule.rrule(rrule.DAILY,
12 | dtstart=datetime.strptime(d1, '%Y%m%d'),
13 | until=datetime.strptime(d2, '%Y%m%d')):
14 | print dt.strftime('%Y%m%d')
15 |
16 | # 20171030 20171031 20171101 20171102
17 |
18 |
19 | def get_all_day_v2():
20 | d1 = datetime.date(2017, 10, 30)
21 | d2 = datetime.date(2017, 11, 2)
22 | delta = d2 - d1
23 | print delta.days
24 | # 3
25 | days = [d1 + datetime.timedelta(days=x) for x in range((d2-d1).days + 1)]
26 | for day in days:
27 | print(day.strftime('%Y%m%d'))
28 |
29 | # 20171030 20171031 20171101 20171102
30 |
31 |
32 | def tb_partition_sql():
33 | """
34 | mysql partition table by day in month
35 | :return:
36 | """
37 | sql = """PARTITION p%s VALUES LESS THAN (TO_DAYS('%s')) ENGINE = InnoDB,"""
38 | d1 = datetime.date(2018, 12, 1)
39 | d2 = datetime.date(2018, 12, 31)
40 | days = [d1 + datetime.timedelta(days=x) for x in range((d2-d1).days + 2)]
41 | # print len(days)
42 | for i in xrange(len(days) - 1):
43 | s1 = days[i].strftime('%Y%m%d')
44 | s2 = days[i + 1].strftime('%Y-%m-%d')
45 | print sql % (s1, s2)
46 |
47 | # PARTITION p20171201 VALUES LESS THAN (TO_DAYS('2017-12-02')) ENGINE = InnoDB,
48 | # ...........
49 | # PARTITION p20171231 VALUES LESS THAN (TO_DAYS('2018-01-01')) ENGINE = InnoDB,
50 |
51 | if __name__ == '__main__':
52 | # get_all_day_v1()
53 | # get_all_day_v2()
54 | tb_partition_sql()
55 | pass
56 |
--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_visualize_diamond.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | """
4 | data set from
5 | https://github.com/tidyverse/ggplot2/blob/master/data-raw/diamonds.csv
6 | """
7 |
8 | import numpy as np
9 | import pandas as pd
10 | import matplotlib.pyplot as plt
11 | import seaborn as sns
12 | sns.set()
13 |
14 |
15 | def load_data():
16 | f_path = 'E:/python_code/tc_competition/diamond/diamonds.csv'
17 | df_data = pd.read_csv(f_path)
18 |
19 | print df_data.columns
20 |
21 | print df_data.describe()
22 |
23 | print df_data['clarity'].value_counts()
24 |
25 | my_tab = pd.crosstab(index=df_data["clarity"], # Make a crosstab
26 | columns="count") # Name the count column
27 |
28 | # my_tab.plot.bar()
29 | # plt.show()
30 |
31 | print my_tab.sum() # # Sum the counts
32 |
33 | print my_tab.shape # Check number of rows and cols
34 |
35 | print my_tab.iloc[1:7] # Slice rows 1-6
36 |
37 | print my_tab / my_tab.sum()
38 |
39 | # df_data.boxplot(column="price", # Column to plot
40 | # by="clarity", # Column to split upon
41 | # figsize=(8, 8)) # Figure size
42 |
43 | # plt.show()
44 | print '=========================='
45 | # two-way table
46 | grouped = df_data.groupby(['cut', 'clarity'])
47 | print grouped.size()
48 |
49 | print '=========================='
50 | clarity_color_table = pd.crosstab(index=df_data["clarity"],
51 | columns=df_data["color"])
52 |
53 | print clarity_color_table
54 |
55 | clarity_color_table.plot(kind="bar",
56 | figsize=(8, 8),
57 | stacked=True)
58 | plt.show()
59 |
60 |
61 | if __name__ == '__main__':
62 | load_data()
63 | pass
64 |
--------------------------------------------------------------------------------
/python_utils/DbService/mysql_db/DbSubService.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | from DbBase import DbBase
4 |
5 |
6 | class DbSubService(DbBase):
7 | def __init__(self, **kwargs):
8 | super(DbSubService, self).__init__(**kwargs)
9 |
10 | def query(self):
11 | pass
12 |
13 | def count(self, tb):
14 | """
15 | :param tb:
16 | :return: table rows count
17 | """
18 | query_sql = ' select count(*) from %s ' % tb
19 | self.cursor.execute(query_sql)
20 | res = self.cursor.fetchone()
21 | print res[0]
22 | return res[0]
23 |
24 | def get_liver_info(self, limit_start, limit_size):
25 | query = """select reported_uid,
26 | sum(audit_status='S01') as audit_status_S01,
27 | sum(audit_status='S02') as audit_status_S02,
28 | sum(audit_status='S03') as audit_status_S03,
29 | sum(audit_status='S04') as audit_status_S04,
30 | sum(audit_status='S05') as audit_status_S05,
31 | count(*) as audit_status_all from iboms.tb_ms_mobile_report_test
32 | group by reported_uid
33 | limit %s, %s""" % (limit_start, limit_size)
34 |
35 | self.cursor.execute(query)
36 | return [row for row in self.cursor]
37 |
38 | def bulk_update(self, lst):
39 | """
40 | batch updates
41 | [("new_value" , "3"),("new_value" , "6")]
42 | :param lst:
43 | :return:
44 | """
45 | query = """UPDATE Writers SET Name = %s WHERE Id = %s"""
46 | self.cursor.executemany(query, lst)
47 | self.conn.commit()
48 |
49 |
50 | if __name__ == '__main__':
51 | db = DbSubService(db_config_file='../config/mysql_config.json')
52 | tb = 'tb_test'
53 | db.count(tb)
54 |
--------------------------------------------------------------------------------
/python_utils/numpy_operate/arr_sort.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | import numpy as np
3 |
4 |
5 | def arr_arg_sort():
6 | arr = np.random.permutation(3 * 4).reshape(3, 4)
7 | np.random.shuffle(arr)
8 | print arr
9 | arr_sort_idx = np.argsort(arr) # default sort by row
10 | print arr_sort_idx
11 | # print np.array(arr)[arr_sort_idx] # does not apply to mul-dim array
12 | arr_sort_idx = np.argsort(arr, axis=0) # sort by column
13 | print arr_sort_idx
14 | x = np.array([0, 2, 1])
15 | print np.argsort(x)
16 | # [0 2 1] ascending order
17 | print np.argsort(-x)
18 | # [1 2 0] descending order
19 |
20 | arr = np.array([4, 1, 3, 5])
21 | print arr, arr[arr.argsort()]
22 | # [4 1 3 5] [1 3 4 5]
23 | print arr.argsort()
24 | # [1 2 0 3]
25 | print np.argsort(-arr), arr[np.argsort(-arr)]
26 | # [3 0 2 1], [5 4 3 1]
27 |
28 |
29 | def arr_sort():
30 | arr = np.random.permutation(3 * 4).reshape(3, 4)
31 | print arr
32 | # [[11 4 6 1]
33 | # [10 0 2 9]
34 | # [ 8 7 5 3]]
35 | arr.sort()
36 | print 'after sort \n', arr
37 | # [[ 1 4 6 11]
38 | # [ 0 2 9 10]
39 | # [ 3 5 7 8]]
40 | print np.sort(arr) # the result is as some as arr.sort(),default sort by row
41 | print np.sort(arr, axis=0) # sort by column, axis=0 means column
42 | pass
43 |
44 |
45 | def arr_sum():
46 | arr = np.arange(6).reshape((2, 3))
47 | print arr
48 | # [[0 1 2]
49 | # [3 4 5]]
50 | print arr.sum(axis=0)
51 | # [3 5 7]
52 | print arr.sum(axis=1)
53 | # [ 3 12]
54 |
55 | print arr > 1
56 | # [[False False True]
57 | # [ True True True]]
58 | print arr[::-1]
59 | # [[3 4 5]
60 | # [0 1 2]]
61 |
62 | if __name__ == '__main__':
63 | # arr_sum()
64 | arr_arg_sort()
65 | # arr_sort()
66 | pass
67 |
--------------------------------------------------------------------------------
/python_utils/py_basic/operator_ope.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | """
3 | python operator module usage, cited from
4 | https://docs.python.org/2/library/operator.html
5 | """
6 |
7 | import operator
8 |
9 |
10 | def cmp_fun():
11 | a, b = 5, 3
12 | print operator.le(a, b)
13 | # False
14 | print operator.gt(a, b)
15 | # True
16 |
17 |
18 | def lst_ope():
19 | lst = [1, 2, 3]
20 | print operator.indexOf(lst, 2)
21 | # 1
22 | lst1 = [1, 2, 3, 2]
23 | print operator.countOf(lst1, 2)
24 | # 2
25 |
26 |
27 | def cal_ope():
28 | lst1 = [0, 1, 2, 3]
29 | lst2 = [10, 20, 30, 40]
30 | print map(operator.mul, lst1, lst2)
31 | # [0, 20, 60, 120]
32 |
33 | print sum(map(operator.mul, lst1, lst2))
34 | # 200
35 |
36 | a, b = 1, 3
37 | print operator.iadd(a, b)
38 | # 4
39 |
40 |
41 | def item_ope():
42 | s = ['h', 'e', 'l', 'l', 'o']
43 | print operator.getitem(s, 1)
44 | # e
45 | print operator.itemgetter(1, 4)(s)
46 | # ('e', 'o')
47 |
48 | inventory = [('apple', 3), ('banana', 2), ('pear', 5), ('orange', 1)]
49 | get_count = operator.itemgetter(1)
50 | print map(get_count, inventory)
51 | # [3, 2, 5, 1]
52 |
53 | print sorted(inventory, key=get_count)
54 | # [('orange', 1), ('banana', 2), ('apple', 3), ('pear', 5)]
55 |
56 |
57 | def reduce_ope():
58 | a = [2, 3, 4, 5]
59 | print reduce(lambda x, y: x + y, a)
60 | # 14
61 | print reduce(operator.add, a)
62 | # 14
63 |
64 | lst = [3, 2, 3]
65 | print reduce(operator.xor, lst)
66 | # 2
67 |
68 | # use reduce with init value, sum from init
69 | lst = [1, 2, 3]
70 | print reduce(operator.add, lst, 10)
71 | # 16
72 |
73 |
74 | if __name__ == '__main__':
75 | reduce_ope()
76 | # item_ope()
77 | # cal_ope()
78 | # lst_ope()
79 | # cmp_fun()
80 | pass
81 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/dataset/cluster/cluster_txt:
--------------------------------------------------------------------------------
1 | 1.658985 4.285136
2 | -3.453687 3.424321
3 | 4.838138 -1.151539
4 | -5.379713 -3.362104
5 | 0.972564 2.924086
6 | -3.567919 1.531611
7 | 0.450614 -3.302219
8 | -3.487105 -1.724432
9 | 2.668759 1.594842
10 | -3.156485 3.191137
11 | 3.165506 -3.999838
12 | -2.786837 -3.099354
13 | 4.208187 2.984927
14 | -2.123337 2.943366
15 | 0.704199 -0.479481
16 | -0.392370 -3.963704
17 | 2.831667 1.574018
18 | -0.790153 3.343144
19 | 2.943496 -3.357075
20 | -3.195883 -2.283926
21 | 2.336445 2.875106
22 | -1.786345 2.554248
23 | 2.190101 -1.906020
24 | -3.403367 -2.778288
25 | 1.778124 3.880832
26 | -1.688346 2.230267
27 | 2.592976 -2.054368
28 | -4.007257 -3.207066
29 | 2.257734 3.387564
30 | -2.679011 0.785119
31 | 0.939512 -4.023563
32 | -3.674424 -2.261084
33 | 2.046259 2.735279
34 | -3.189470 1.780269
35 | 4.372646 -0.822248
36 | -2.579316 -3.497576
37 | 1.889034 5.190400
38 | -0.798747 2.185588
39 | 2.836520 -2.658556
40 | -3.837877 -3.253815
41 | 2.096701 3.886007
42 | -2.709034 2.923887
43 | 3.367037 -3.184789
44 | -2.121479 -4.232586
45 | 2.329546 3.179764
46 | -3.284816 3.273099
47 | 3.091414 -3.815232
48 | -3.762093 -2.432191
49 | 3.542056 2.778832
50 | -1.736822 4.241041
51 | 2.127073 -2.983680
52 | -4.323818 -3.938116
53 | 3.792121 5.135768
54 | -4.786473 3.358547
55 | 2.624081 -3.260715
56 | -4.009299 -2.978115
57 | 2.493525 1.963710
58 | -2.513661 2.642162
59 | 1.864375 -3.176309
60 | -3.171184 -3.572452
61 | 2.894220 2.489128
62 | -2.562539 2.884438
63 | 3.491078 -3.947487
64 | -2.565729 -2.012114
65 | 3.332948 3.983102
66 | -1.616805 3.573188
67 | 2.280615 -2.559444
68 | -2.651229 -3.103198
69 | 2.321395 3.154987
70 | -1.685703 2.939697
71 | 3.031012 -3.620252
72 | -4.599622 -2.185829
73 | 4.196223 1.126677
74 | -2.133863 3.093686
75 | 4.668892 -2.562705
76 | -2.793241 -2.149706
77 | 2.884105 3.043438
78 | -2.967647 2.848696
79 | 4.479332 -1.764772
80 | -4.905566 -2.911070
--------------------------------------------------------------------------------
/python_utils/numpy_operate/algebra_op.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | """
3 | This file is about linear algebra operator
4 | """
5 | import numpy as np
6 |
7 |
8 | def mean():
9 | arr = np.arange(12).reshape((3, 4))
10 | print arr
11 | # [[ 0 1 2 3]
12 | # [ 4 5 6 7]
13 | # [ 8 9 10 11]]
14 | mean_arr = np.mean(arr, axis=0)
15 | print mean_arr
16 | # [ 4. 5. 6. 7.]
17 |
18 | print arr - mean_arr
19 | # [[-4. -4. -4. -4.]
20 | # [ 0. 0. 0. 0.]
21 | # [ 4. 4. 4. 4.]]
22 | print np.mean(arr)
23 | # 5.5
24 | print np.mean(arr, axis=1)
25 | # [ 1.5 5.5 9.5]
26 |
27 |
28 | def covariance():
29 | arr = np.arange(12).reshape((4, 3))
30 | print arr
31 | mean_arr = np.mean(arr, axis=0)
32 | print mean_arr
33 | cov_arr = np.cov(mean_arr, rowvar=0)
34 | print cov_arr
35 | mean_sub_arr = arr - mean_arr
36 | print mean_sub_arr
37 | print np.cov(mean_sub_arr, rowvar=0)
38 | print np.var(mean_sub_arr, 0)
39 |
40 | a = np.array([[1, 2], [3, 4]])
41 | print np.var(a)
42 | # 1.25 get variance of all the element_wise
43 | print np.var(a, 0)
44 | # [ 1. 1.] get variance of every column
45 | print np.var(a, 1)
46 | # [ 0.25 0.25] get variance of every row
47 |
48 |
49 | def eigen_vec_val():
50 | arr1 = np.array([[-1, 0], [0, 1]])
51 | eig_val1, eig_vec1 = np.linalg.eig(arr1)
52 | # print eig_val1
53 | # print eig_vec1
54 |
55 | arr2 = np.random.randint(1, 10, size=(3, 3))
56 | eig_val2, eig_vec2 = np.linalg.eig(arr2)
57 | print eig_val2
58 | print eig_vec2
59 | eig_val2_idx = np.argsort(eig_val2)
60 | print eig_val2_idx
61 | print eig_val2_idx[::-1]
62 | print eig_vec2[:, eig_val2_idx]
63 | print eig_vec2[:, eig_val2_idx[::-1]]
64 |
65 | if __name__ == '__main__':
66 | mean()
67 | # covariance()
68 | # eigen_vec_val()
69 | pass
70 |
--------------------------------------------------------------------------------
/python_utils/numpy_operate/log2_op.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import numpy as np
4 |
5 |
6 | def log2_test():
7 | arr = np.array([0, 1, 2, 3, 2 ** 4])
8 | print np.log2(arr)
9 | # [-inf, 0, 1, 1.5849625, 4]
10 |
11 | arr = np.array([1, 2, 4, 2 ** 3])
12 | arr_lg = np.log2(arr)
13 | print arr_lg
14 | # [ 0. 1. 2. 3.]
15 | print arr_lg * arr
16 | # [ 0. 2. 8. 24.]
17 |
18 | # calculate entropy
19 | print np.sum(arr_lg * arr)
20 | # 34
21 |
22 |
23 | def cal_entropy():
24 | arr1 = np.array([0.5, 0.5])
25 | print np.log2(arr1)
26 | # [-1. -1.]
27 |
28 | print np.sum(np.log2(arr1) * arr1)
29 | # -1.0
30 |
31 | print np.sum(-1 * np.log2(arr1) * arr1)
32 | # 1.0
33 |
34 | arr2 = np.array([0.1, 0.9])
35 | print np.sum(-1 * np.log2(arr2) * arr2)
36 | # 0.468995593589
37 |
38 |
39 | class SoftmaxLayer:
40 | def __init__(self, name='Softmax'):
41 | pass
42 |
43 | def forward(self, in_data):
44 | shift_scores = in_data - np.max(in_data, axis=1).reshape(-1, 1)
45 | self.top_val = np.exp(shift_scores) / np.sum(np.exp(shift_scores), axis=1).reshape(-1, 1)
46 | return self.top_val
47 |
48 | def backward(self, residual):
49 | N = residual.shape[0]
50 | dscores = self.top_val.copy()
51 | dscores[range(N), list(residual)] -= 1
52 | dscores /= N
53 | return dscores
54 |
55 |
56 | def test_log():
57 | arr1 = np.array([[-0.1, -0.2, -0.3], [0.1, 0.2, 0.4]])
58 | sl = SoftmaxLayer()
59 | # print sl.forward(arr1)
60 |
61 | # print np.max(arr1, axis=1).reshape(-1, 1)
62 |
63 | arr2 = np.array([[1, 2, 3], [-1, -2, -4]])
64 | # print sl.forward(arr2)
65 |
66 | arr_base = np.array([2, 2, np.e])
67 | arr_log = np.array([1, 2, 4])
68 | print np.log([arr_base, arr_log])
69 |
70 |
71 | if __name__ == '__main__':
72 | test_log()
73 | # log2_test()
74 | # cal_entropy()
75 | pass
76 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/decision_tree/dtree_sklearn.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | """
4 | data set from:
5 | http://archive.ics.uci.edu/ml/datasets/banknote+authentication
6 | http://archive.ics.uci.edu/ml/machine-learning-databases/00267/data_banknote_authentication.txt
7 | """
8 |
9 | from sklearn.datasets import load_iris
10 | from sklearn import tree
11 | from sklearn.tree import export_graphviz
12 | import subprocess
13 |
14 | from create_data import get_loan_data_lh
15 |
16 |
17 | def visualize_tree(tree, feature_name, dot_file):
18 | """Create tree png using graphviz.
19 | tree -- scikit-learn DecsisionTree.
20 | feature_names -- list of feature names.
21 | dot_file -- dot file name and path
22 | """
23 | with open("tree.dot", 'w') as f:
24 | export_graphviz(tree, out_file=f,
25 | feature_names=feature_name)
26 |
27 | dt_png = dot_file.replace('dot', 'png')
28 | command = ["dot", "-Tpng", dot_file, "-o", dt_png]
29 | try:
30 | subprocess.check_call(command)
31 | except Exception as e:
32 | print e
33 | exit("Could not run dot, ie graphviz, to "
34 | "produce visualization")
35 |
36 |
37 | def iris_demo():
38 | clf = tree.DecisionTreeClassifier()
39 | iris = load_iris()
40 | # iris.data属性150*4,iris.target 类别归一化为了0,1,2(150*1)
41 | clf = clf.fit(iris.data, iris.target)
42 | dot_file = 'tree.dot'
43 | tree.export_graphviz(clf, out_file=dot_file)
44 | visualize_tree(clf, iris.feature_names, dot_file)
45 |
46 | # (graph,) = pydot.graph_from_dot_file('tree.dot')
47 | # graph.write_png('somefile.png')
48 |
49 |
50 | def loan_demo():
51 | dt = tree.DecisionTreeClassifier()
52 | X, Y = get_loan_data_lh()
53 | dt = dt.fit(X, Y)
54 | dot_file = 'loan.dot'
55 | tree.export_graphviz(dt, out_file=dot_file)
56 | feature_names = ['age', 'has work', 'own house', 'loan level']
57 | visualize_tree(dt, feature_names, dot_file)
58 |
59 |
60 | if __name__ == '__main__':
61 | # iris_demo()
62 | loan_demo()
63 | pass
64 |
--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_concat_join.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import numpy as np
4 | import pandas as pd
5 |
6 |
7 | def np_arr_concat():
8 | x = [1, 2, 3]
9 | y = [4, 5, 6]
10 | z = [7, 8, 9]
11 |
12 | print np.concatenate([x, y, z])
13 | # [1 2 3 4 5 6 7 8 9]
14 |
15 | x = [[1, 2],
16 | [3, 4]]
17 | print np.concatenate([x, x])
18 | # [[1 2]
19 | # [3 4]
20 | # [1 2]
21 | # [3 4]]
22 |
23 | print np.concatenate([x, x], axis=1)
24 | # [[1 2 1 2]
25 | # [3 4 3 4]]
26 |
27 |
28 | def make_df(cols, idx):
29 | """
30 | make_df('ABC', range(3))
31 | cols->ABC, idx -> [0, 1, 2]
32 | return
33 | A B C
34 | 0 A0 B0 C0
35 | 1 A1 B1 C1
36 | 2 A2 B2 C2
37 | """
38 | data = {c:[str(c) + str(i) for i in idx] for c in cols}
39 |
40 | return pd.DataFrame(data, idx)
41 |
42 |
43 | def series_concat():
44 | ser1 = pd.Series(['A', 'B', 'C'], index=[1, 2, 3])
45 | ser2 = pd.Series(['D', 'E', 'F'], index=[4, 5, 6])
46 | print pd.concat([ser1, ser2])
47 | # 1 A
48 | # 2 B
49 | # 3 C
50 | # 4 D
51 | # 5 E
52 | # 6 F
53 | # dtype: object
54 |
55 |
56 | def df_concat():
57 | df1 = make_df('AB', [1, 2])
58 | df2 = make_df('AB', [3, 4])
59 | print df1
60 | # A B
61 | # 1 A1 B1
62 | # 2 A2 B2
63 | print df2
64 | # A B
65 | # 3 A3 B3
66 | # 4 A4 B4
67 | print pd.concat([df1, df2])
68 | # A B
69 | # 1 A1 B1
70 | # 2 A2 B2
71 | # 3 A3 B3
72 | # 4 A4 B4
73 |
74 | df3 = make_df('AB', [0, 1])
75 | df4 = make_df('CD', [0, 1])
76 |
77 | print df3
78 | # A B
79 | # 0 A0 B0
80 | # 1 A1 B1
81 | print df4
82 | # C D
83 | # 0 C0 D0
84 | # 1 C1 D1
85 | print pd.concat([df3, df4], axis=1)
86 | # A B C D
87 | # 0 A0 B0 C0 D0
88 | # 1 A1 B1 C1 D1
89 |
90 | if __name__ == '__main__':
91 | df_concat()
92 | # series_concat()
93 | # np_arr_concat()
94 | pass
95 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/neural_network_keras/cnn_keras_digits.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | """
4 | This file is mainly use cnn to recognize digit
5 | """
6 |
7 | import numpy as np
8 | from keras.models import Sequential
9 | from keras.layers import Dense, Dropout, Flatten
10 | from keras.layers.convolutional import Conv2D
11 | from keras.layers.convolutional import MaxPooling2D
12 | from keras.utils import np_utils
13 | from keras import backend as K
14 | K.set_image_dim_ordering('th')
15 |
16 | from nn_keras_digits import load_data
17 |
18 |
19 | def pre_process_data():
20 | (X_train, y_train), (X_test, y_test) = load_data()
21 | # reshape to be [samples][pixels][width][height]
22 | X_train = X_train.reshape(X_train.shape[0], 1, 28, 28).astype('float32')
23 | # X_train.shape -> (60000L, 28L, 28L)
24 | X_test = X_test.reshape(X_test.shape[0], 1, 28, 28).astype('float32')
25 | # normalize inputs from 0-255 to 0-1
26 | X_train = X_train / 255
27 | X_test = X_test / 255
28 | # one hot encode outputs
29 | y_train = np_utils.to_categorical(y_train)
30 | y_test = np_utils.to_categorical(y_test)
31 | num_classes = y_test.shape[1]
32 | return X_train, y_train, X_test, y_test
33 |
34 |
35 | def baseline_model():
36 | model = Sequential()
37 | model.add(Conv2D(32, (5, 5), input_shape=(1, 28, 28), activation='relu'))
38 | model.add(MaxPooling2D(pool_size=(2, 2)))
39 | model.add(Dropout(0.2))
40 | model.add(Flatten())
41 | model.add(Dense(128, activation='relu'))
42 | model.add(Dense(10, activation='softmax'))
43 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
44 | return model
45 |
46 |
47 | def train_and_evaluate():
48 | X_train, y_train, X_test, y_test = pre_process_data()
49 | model = baseline_model()
50 | model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)
51 | scores = model.evaluate(X_test, y_test, verbose=0)
52 | print("Baseline Error: %.2f%%" % (100-scores[1]*100))
53 |
54 | if __name__ == '__main__':
55 | train_and_evaluate()
56 |
57 | pass
58 |
--------------------------------------------------------------------------------
/python_utils/py_basic/obj_is.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 | import operator
3 |
4 |
5 | def i_is_check():
6 | a = 256
7 | b = 256
8 | print a is b
9 | # True
10 | a1 = 2571111
11 | b1 = 2571111
12 | print a1 is b1
13 | # True
14 |
15 |
16 | def max_activity(s, e):
17 | tp_lst = sorted(zip(s, e), key=lambda t: t[1] - t[0])
18 |
19 | lst_target = []
20 | while len(tp_lst):
21 | tp_lst.sort(key=lambda t: t[1] - t[0])
22 | min_se = tp_lst[0]
23 | lst_target.append(min_se)
24 | tp_lst.pop(0)
25 |
26 | tp_lst = filter(lambda x: x[1] <= min_se[0] or x[0] >= min_se[1], tp_lst)
27 |
28 | # for item in lst_target: print item
29 |
30 | print "->".join([str(item) for item in lst_target])
31 |
32 |
33 | def test_max_activity():
34 | s = [1, 3, 0, 5, 3, 5, 6, 8, 8, 2]
35 | e = [4, 5, 6, 7, 4, 6, 9, 10, 11, 5]
36 | max_activity(s, e)
37 | # (3, 4)->(5, 6)->(8, 10)
38 |
39 | s = [1, 3, 0, 5, 3, 5, 6, 8, 8, 2, 12]
40 | e = [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
41 | max_activity(s, e)
42 | # (3, 5)->(5, 7)->(12, 14)->(8, 11)
43 |
44 |
45 | def coin_question(coin_val, coin_count, money):
46 | sum = 0
47 | d_val_count = {}
48 |
49 | for i in range(len(coin_val))[::-1]:
50 | df = money - sum
51 | n = df / coin_val[i]
52 | n = min(coin_count[i], n)
53 | if n:
54 | sum = sum + n * coin_val[i]
55 | d_val_count[coin_val[i]] = n
56 |
57 | return d_val_count
58 |
59 |
60 | def test_coin_question():
61 | coin_val = [1, 2, 5, 10, 20, 50, 100]
62 | coin_count = [3, 0, 2, 1, 0, 3, 5]
63 |
64 | money = 113
65 | d_val_count = coin_question(coin_val, coin_count, money)
66 | print d_val_count # {1: 3, 10: 1, 100: 1}
67 |
68 | money = 272
69 | d_val_count = coin_question(coin_val, coin_count, money)
70 | print d_val_count # {1: 2, 50: 1, 100: 2, 10: 1, 5: 2}
71 |
72 | if __name__ == '__main__':
73 | test_coin_question()
74 | # test_max_activity()
75 | # i_is_check()
76 | lst = [1, 2, 3]
77 | print range(len(lst))[::-1]
78 | print lst[::-1]
79 | pass
80 |
--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_GridSearchCV.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import numpy as np
4 | import pandas as pd
5 | from sklearn import svm, datasets
6 | from sklearn.model_selection import GridSearchCV
7 | from sklearn.metrics import classification_report
8 |
9 | import matplotlib.pyplot as plt
10 | import seaborn as sns
11 | sns.set()
12 |
13 |
14 | def test_grid_search_cv():
15 | iris = datasets.load_iris()
16 | parameters = {'kernel': ('linear', 'rbf'),
17 | 'C': [1, 2, 4], 'gamma': [0.125, 0.25, 0.5, 1, 2, 4]}
18 |
19 | svr = svm.SVC()
20 | clf = GridSearchCV(svr, parameters, n_jobs=-1)
21 | clf.fit(iris.data, iris.target)
22 | cv_result = pd.DataFrame.from_dict(clf.cv_results_)
23 | with open('cv_result.csv', 'w') as f:
24 | cv_result.to_csv(f)
25 |
26 | print('The parameters of the best model are: ')
27 | print(clf.best_params_)
28 |
29 | y_pred = clf.predict(iris.data)
30 | print(classification_report(y_true=iris.target, y_pred=y_pred))
31 |
32 |
33 | def grid_search_cv_graph():
34 | iris = datasets.load_digits()
35 | X = iris.data
36 | Y = iris.target
37 |
38 | C_lst = [1, 10, 100, 1000]
39 | gamma_lst = [0.125, 0.25, 0.5, 1, 2, 4]
40 | gamma_lst = [1e-3, 1e-4]
41 |
42 | parameters = {'C': C_lst, 'gamma': gamma_lst}
43 |
44 | # parameters = {'kernel': ('linear', 'rbf'),
45 | # 'C': C_lst, 'gamma': gamma_lst}
46 |
47 | clf_ = svm.SVC()
48 | clf = GridSearchCV(clf_, parameters, cv=2, n_jobs=-1)
49 | clf.fit(X, Y)
50 |
51 | print clf.best_params_
52 | print clf.best_score_
53 |
54 | print clf.cv_results_
55 |
56 | # scores = [x[1] for x in clf.grid_scores_]
57 | scores = clf.cv_results_['mean_test_score']
58 | print scores
59 | scores = np.array(scores).reshape(len(C_lst), len(gamma_lst))
60 |
61 | for ind, i in enumerate(C_lst):
62 | plt.plot(gamma_lst, scores[ind], label='C: ' + str(i))
63 |
64 | plt.legend()
65 | plt.xlabel('Gamma')
66 | plt.ylabel('Mean score')
67 | plt.show()
68 |
69 | if __name__ == '__main__':
70 | # test_grid_search_cv()
71 | grid_search_cv_graph()
72 | pass
73 |
--------------------------------------------------------------------------------
/python_utils/http_basic/http_client_get.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | """
4 | This file a sample demo to do http stress test
5 | """
6 | import requests
7 | import time
8 | from multiprocessing.dummy import Pool as ThreadPool
9 | import urllib
10 |
11 |
12 | def get_ret_from_http(url):
13 | """cited from https://stackoverflow.com/questions/645312/what-is-the-quickest-way-to-http-get-in-python
14 | """
15 | ret = requests.get(url)
16 | print ret.content
17 | # eg. result: {"error":false,"resultMap":{"check_ret":1},"success":true}
18 |
19 |
20 | def multi_process_stress_test():
21 | """
22 | start up 4 thread to issue 1000 http requests to server
23 | and test consume time
24 | :return:
25 | """
26 | start = time.time()
27 | url = """http://127.0.0.1:9325/shortvideo/checkBlack?url=http%3A%2F%2Fzbasecapture.bs2.yy.com%2F42269159_1499248536403_3.jpg&serial=abcdddddddd"""
28 | url1 = """http://127.0.0.1:9325/shortvideo/checkBlack?url=http%3A%2F%2Fgenie.bs2dl.yy.com%2Ff4955aa1ab1c479256e2a2c5cdec73a6&serial=abceeeeeeee"""
29 | lst_url = [url, url1]*50
30 | pool = ThreadPool(5)
31 | ret = pool.map(get_ret_from_http, lst_url)
32 | pool.close()
33 | pool.join()
34 | print 'time consume %s' % (time.time() - start)
35 |
36 |
37 | def make_url():
38 | """
39 | generate url with parameter
40 | https://xy.com/index.php?url=http%3A//xy.xxx.com/22.jpg&SecretId=xy_123_move
41 | cited from https://stackoverflow.com/questions/2506379/add-params-to-given-url-in-python
42 | https://github.com/gruns/furl a good util for url operator
43 | :return:
44 | """
45 | para = {"SecretId": "xy_123_move", "url": "http://xy.xxx.com/22.jpg"}
46 |
47 | print urllib.urlencode(para)
48 | # url=http%3A%2F%2Fxy.xxx.com%2F22.jpg&SecretId=xy_123_move
49 |
50 | base_url = 'xy.com/index.php'
51 |
52 | # 记得 下面的是 ? 连接
53 | return 'https://%s?%s' % (base_url, '&'.join('%s=%s' % (k, urllib.quote(str(v))) for k, v in para.iteritems()))
54 |
55 |
56 | if __name__ == '__main__':
57 | # get_ret_from_http()
58 | # multi_process_stress_test()
59 |
60 | print make_url()
61 | # s = "abc"
62 | s = "abc"
63 | print urllib.quote(s)
64 | pass
65 |
--------------------------------------------------------------------------------
/python_utils/thread_process/pool_queue.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | """
3 | This file light weight thread pool queue
4 | Mainly cited from
5 | `http://stackoverflow.com/questions/3033952/python-thread-pool-similar-to-the-multiprocessing-pool`
6 | """
7 | import threading
8 | import logging
9 | from time import sleep
10 | from random import randint
11 | from Queue import Queue
12 |
13 | logging.basicConfig(level=logging.DEBUG,
14 | format='%(levelname)s %(asctime)s %(threadName)s %(message)s',
15 | datefmt='%Y-%m-%d %I:%M:%S')
16 |
17 |
18 | class Worker(threading.Thread):
19 | def __init__(self, task):
20 | super(Worker, self).__init__()
21 | self.task = task
22 | self.daemon = True # if don't set that, then the thread won't stop automatically
23 | self.start()
24 |
25 | def run(self):
26 | while True:
27 | logging.debug('waiting for queue')
28 | func, args, kargs = self.task.get()
29 | try:
30 | logging.debug('now I am going to do task')
31 | func(*args, **kargs)
32 | except Exception, e:
33 | logging.warn(e)
34 | finally:
35 | self.task.task_done()
36 |
37 |
38 | class ThreadPool:
39 | def __init__(self, num_thread):
40 | self.tasks = Queue(num_thread)
41 | for w in xrange(num_thread):
42 | Worker(self.tasks)
43 |
44 | def add_task(self, func, *args, **kwargs):
45 | self.tasks.put((func, args, kwargs))
46 | pass
47 |
48 | def wait_completion(self):
49 | """
50 | the corresponding consume thread should be a daemon thread,
51 | so it can exit automatically
52 | :return:
53 | """
54 | self.tasks.join()
55 | pass
56 |
57 |
58 | def handler(sec):
59 | logging.debug('now I will sleep %s S', sec)
60 | sleep(sec)
61 |
62 |
63 | def test():
64 | lst_sleep_sec = [randint(5, 20) for i in xrange(20)]
65 | pool = ThreadPool(5)
66 | for sec in lst_sleep_sec:
67 | pool.add_task(handler, sec)
68 |
69 | pool.wait_completion()
70 | pass
71 |
72 | if __name__ == '__main__':
73 | test()
74 | pass
75 |
--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_ope.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import numpy as np
4 | import pandas as pd
5 |
6 |
7 | def idx_align_series():
8 | A = pd.Series([2, 4, 6], index=[0, 1, 2])
9 | B = pd.Series([1, 3, 5], index=[1, 2, 3])
10 |
11 | print A + B
12 | # 0 NaN
13 | # 1 5.0
14 | # 2 9.0
15 | # 3 NaN
16 | # dtype: float64
17 |
18 | print A.add(B, fill_value=0)
19 | # 0 2.0
20 | # 1 5.0
21 | # 2 9.0
22 | # 3 5.0
23 | # dtype: float64
24 |
25 |
26 | def idx_align_df():
27 | rng = np.random.RandomState(42)
28 | df1 = pd.DataFrame(rng.randint(0, 20, (2, 2)),
29 | columns=list('AB'))
30 | print df1
31 | # A B
32 | # 0 6 19
33 | # 1 14 10
34 | df2 = pd.DataFrame(rng.randint(0, 10, (3, 3)),
35 | columns=list('BAC'))
36 |
37 | print df2
38 | # B A C
39 | # 0 7 4 6
40 | # 1 9 2 6
41 | # 2 7 4 3
42 |
43 | print df1 + df2
44 | # A B C
45 | # 0 10.0 26.0 NaN
46 | # 1 16.0 19.0 NaN
47 | # 2 NaN NaN NaN
48 |
49 | fill = df1.stack().mean()
50 | print fill
51 | # 12.25
52 | print df1.add(df2, fill_value=fill)
53 | # A B C
54 | # 0 10.00 26.00 18.25
55 | # 1 16.00 19.00 18.25
56 | # 2 16.25 19.25 15.25
57 |
58 | print df1
59 | # A B
60 | # 0 6 19
61 | # 1 14 10
62 |
63 |
64 | def row_col_ope():
65 | rng = np.random.RandomState(42)
66 | arr = rng.randint(10, size=(3, 4))
67 | print arr
68 | # [[6 3 7 4]
69 | # [6 9 2 6]
70 | # [7 4 3 7]]
71 | print arr - arr[0]
72 | # [[ 0 0 0 0]
73 | # [ 0 6 -5 2]
74 | # [ 1 1 -4 3]]
75 | df = pd.DataFrame(arr, columns=list('QRST'))
76 | print df - df.iloc[0]
77 | # Q R S T
78 | # 0 0 0 0 0
79 | # 1 0 6 -5 2
80 | # 2 1 1 -4 3
81 | print df.subtract(df['R'], axis=0)
82 | # Q R S T
83 | # 0 3 0 4 1
84 | # 1 -3 0 -7 -3
85 | # 2 3 0 -1 3
86 |
87 | print df # no change
88 |
89 | if __name__ == '__main__':
90 | row_col_ope()
91 | # idx_align_df()
92 | # idx_align_series()
93 | pass
94 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/knearest/knn_classify_sklearn.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | """
4 | use sklearn to do knn prediction
5 |
6 | data set from: https://archive.ics.uci.edu/ml/datasets/Iris
7 | mainly cited from the below blog:
8 | https://kevinzakka.github.io/2016/07/13/k-nearest-neighbor/
9 | """
10 |
11 | import numpy as np
12 | from sklearn.metrics import accuracy_score
13 | from sklearn.neighbors import KNeighborsClassifier
14 | from sklearn.model_selection import train_test_split, cross_val_score
15 | import pandas as pd
16 | import matplotlib.pyplot as plt
17 |
18 |
19 | def load_data():
20 | names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
21 | # loading training data
22 | path = '../dataset/knn/iris_data.txt'
23 | df = pd.read_csv(path, header=None, names=names)
24 | # print df.head()
25 | x = np.array(df.ix[:, 0: 4])
26 | y = np.array(df['class'])
27 |
28 | print x.shape, y.shape
29 | # x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=40)
30 | return train_test_split(x, y, test_size=0.33, random_state=40)
31 |
32 |
33 | def predict():
34 | x_train, x_test, y_train, y_test = load_data()
35 | k = 3
36 | knn = KNeighborsClassifier(n_neighbors=k)
37 | knn.fit(x_train, y_train)
38 | pred = knn.predict(x_test)
39 | print accuracy_score(y_test, pred)
40 |
41 |
42 | def cross_validation():
43 | x_train, x_test, y_train, y_test = load_data()
44 | k_lst = list(range(1, 30))
45 | lst_scores = []
46 |
47 | for k in k_lst:
48 | knn = KNeighborsClassifier(n_neighbors=k)
49 | scores = cross_val_score(knn, x_train, y_train, cv=10, scoring='accuracy')
50 | lst_scores.append(scores.mean())
51 |
52 | # changing to misclassification error
53 | MSE = [1 - x for x in lst_scores]
54 | optimal_k = k_lst[MSE.index(min(MSE))]
55 | print "The optimal number of neighbors is %d" % optimal_k
56 | # plot misclassification error vs k
57 | # plt.plot(k_lst, MSE)
58 | # plt.ylabel('Misclassification Error')
59 | plt.plot(k_lst, lst_scores)
60 | plt.xlabel('Number of Neighbors K')
61 | plt.ylabel('correct classification rate')
62 | plt.show()
63 |
64 | if __name__ == '__main__':
65 | # load_data()
66 | predict()
67 | # cross_validation()
68 | pass
69 |
--------------------------------------------------------------------------------
/python_utils/py_basic/base64_test.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | """
3 | This file about some operator of base64 in python.
4 | eg. image to base64 string, and then the base64 string to image
5 | For a http issue, we may want to send image directly not url, for sake of
6 | unvisited url by other server
7 | cited from https://stackoverflow.com/questions/3715493/encoding-an-image-file-with-base64
8 |
9 | base64 decode exception
10 | https://stackoverflow.com/questions/12315398/verify-is-a-string-is-encoded-in-base64-python
11 | """
12 | import base64
13 | import requests
14 | import binascii
15 | import time
16 |
17 |
18 | def img_base64():
19 | img_path = 'F:/img_test/dl_img_text_recognition/online_1.jpg'
20 | with open(img_path, 'rb') as img_file:
21 | b64_str = base64.b64encode(img_file.read())
22 | print len(b64_str)
23 | # 55932
24 | print b64_str
25 | # /9j/4AAQSkZ.............
26 |
27 |
28 | def img_url_base64():
29 | url = 'http://i2.chinanews.com/simg/hd/2017/05/15/b3e10469cc0b4b84b2e9cedbb800cd3a.jpg'
30 | url = 'http://yysnapshot.bs2ctl7.yy.com/68a14b739dac400d1d1898327478a556b52260ec?height=720&interval=12402&file=68a14b739dac400d1d1898327478a556b52260ec&width=1280&bucket=yysnapshot&yid=7841950807447568392&day=20170820&t=1503163828000&streamid=7841950807452359080&id=3228019745205722527&size2=320&p=1'
31 | url = 'http://imgcache.qq.com/open_proj/proj_qcloud_v2/gateway/portal/css/img/home/qcloud-logo-dark.png'
32 | b64_str = base64.b64encode(requests.get(url).content)
33 | print len(b64_str)
34 | print b64_str
35 |
36 |
37 | def base64_exception():
38 | s_non_b64 = 'not base64 str 123 456 '
39 | try:
40 | print base64.decodestring(s_non_b64)
41 | # except Exception as e:
42 | except binascii.Error as e:
43 | # you'd better catch exception
44 | print "base64 decode error %s " % e
45 |
46 |
47 | def b64_test():
48 | import time
49 | # test 300kb string decode time
50 | s = 'a' * 1024 * 300
51 | start = time.clock()
52 | b64_str = base64.b64encode(s)
53 | print time.clock() - start
54 | start = time.clock()
55 | base64.decodestring(b64_str)
56 | print time.clock() - start
57 |
58 | if __name__ == '__main__':
59 | # img_base64()
60 | # img_url_base64()
61 | # base64_exception()
62 | b64_test()
63 | pass
64 |
--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_dummy_val.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 |
4 | import pandas as pd
5 |
6 |
7 | def pd_dummy_val_1():
8 | raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
9 | 'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'],
10 | 'sex': ['male', 'female', 'male', 'female', 'female']}
11 |
12 | df = pd.DataFrame(raw_data, columns=['first_name', 'last_name', 'sex'])
13 | print df
14 |
15 | # first_name last_name sex
16 | # 0 Jason Miller male
17 | # 1 Molly Jacobson female
18 | # 2 Tina Ali male
19 | # 3 Jake Milner female
20 | # 4 Amy Cooze female
21 |
22 | # Create a set of dummy variables from the sex variable
23 | df_sex = pd.get_dummies(df['sex'])
24 | # Join the dummy variables to the main dataframe
25 | df_new = pd.concat([df, df_sex], axis=1)
26 | print df_new
27 | # first_name last_name sex female male
28 | # 0 Jason Miller male 0.0 1.0
29 | # 1 Molly Jacobson female 1.0 0.0
30 | # 2 Tina Ali male 0.0 1.0
31 | # 3 Jake Milner female 1.0 0.0
32 | # 4 Amy Cooze female 1.0 0.0
33 |
34 | def pd_dummy_val_2():
35 | raw_data = {"work_hour": [9, 9, 9, 9, 9, 9, 6],
36 | "day": ["mon", "tus", "wend", "thur", "fri", "sta", "sun"]}
37 |
38 | df = pd.DataFrame(raw_data, columns=['work_hour', 'day'])
39 |
40 | print df
41 | # work_hour day
42 | # 0 9 mon
43 | # 1 9 tus
44 | # 2 9 wend
45 | # 3 9 thur
46 | # 4 9 fri
47 | # 5 9 sta
48 | # 6 6 sun
49 |
50 | df_day = pd.get_dummies(df['day'])
51 |
52 | df_new = pd.concat([df, df_day], axis=1)
53 |
54 | print df_new
55 | # work_hour day fri mon sta sun thur tus wend
56 | # 0 9 mon 0.0 1.0 0.0 0.0 0.0 0.0 0.0
57 | # 1 9 tus 0.0 0.0 0.0 0.0 0.0 1.0 0.0
58 | # 2 9 wend 0.0 0.0 0.0 0.0 0.0 0.0 1.0
59 | # 3 9 thur 0.0 0.0 0.0 0.0 1.0 0.0 0.0
60 | # 4 9 fri 1.0 0.0 0.0 0.0 0.0 0.0 0.0
61 | # 5 9 sta 0.0 0.0 1.0 0.0 0.0 0.0 0.0
62 | # 6 6 sun 0.0 0.0 0.0 1.0 0.0 0.0 0.0
63 |
64 | if __name__ == '__main__':
65 | pd_dummy_val_1()
66 | # pd_dummy_val_2()
67 | pass
68 |
--------------------------------------------------------------------------------
/python_utils/http_basic/http_realize/static_server/static_server.py:
--------------------------------------------------------------------------------
1 | import sys, os, BaseHTTPServer
2 |
3 |
4 | class ServerException(Exception):
5 | '''For internal error reporting.'''
6 | pass
7 |
8 |
9 | class CaseNoFile(object):
10 | @staticmethod
11 | def test(handler):
12 | return not os.path.exists(handler.full_path)
13 |
14 | @staticmethod
15 | def act(handler):
16 | raise ServerException({"'{0}' not found".format(handler.full_path)})
17 |
18 |
19 | class CaseExistFile(object):
20 | @staticmethod
21 | def test(handler):
22 | return os.path.isfile(handler.full_path)
23 |
24 | @staticmethod
25 | def act(handler):
26 | handler.handle_file(handler.full_path)
27 |
28 |
29 | class CaseError(object):
30 | @staticmethod
31 | def test(handler):
32 | return True
33 |
34 | @staticmethod
35 | def act(handler):
36 | raise ServerException("'{0}' unknown object".format(handler.full_path))
37 |
38 |
39 | class RequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
40 |
41 | CasesLst = [CaseNoFile, CaseExistFile, CaseError]
42 |
43 | Error_Page = """\
44 |
45 |
46 | Error accessing {path}
47 | {msg}
48 |
49 |
50 | """
51 |
52 | def do_GET(self):
53 | try:
54 | self.full_path = os.getcwd() + self.path
55 | for case in self.CasesLst:
56 | if case.test(self):
57 | case.act(self)
58 | break
59 |
60 | except Exception as msg:
61 | self.handle_error(msg)
62 |
63 | def handle_file(self, path):
64 | try:
65 | with open(path, 'rb') as reader:
66 | content = reader.read()
67 | self.send_content(content)
68 | except IOError as msg:
69 | msg = "'{0}' cannot be read: {1}".format(self.path, msg)
70 | self.handle_error(msg)
71 |
72 | def handle_error(self, msg):
73 | content = self.Error_Page.format(path=self.path, msg=msg)
74 | self.send_content(content)
75 |
76 | def send_content(self, content):
77 | self.send_response(200)
78 | self.send_header("Content-type", "text/html")
79 | self.send_header("Content-Length", str(len(content)))
80 | self.end_headers()
81 | self.wfile.write(content)
82 |
83 | if __name__ == '__main__':
84 | serverAddress = ('', 8888)
85 | server = BaseHTTPServer.HTTPServer(serverAddress, RequestHandler)
86 | server.serve_forever()
87 |
--------------------------------------------------------------------------------
/python_utils/numpy_operate/idx_arrays.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import numpy as np
4 |
5 |
6 | def single_idx():
7 | arr = np.arange(6)
8 | print arr
9 | # [0 1 2 3 4 5]
10 | print arr[1], arr[-1]
11 | # 1 5
12 | arr2 = np.arange(10)
13 | print arr2
14 | # [0 1 2 3 4 5 6 7 8 9]
15 | print arr2[2: 5], arr2[:-7], arr2[1: 7: 2]
16 | # [2 3 4] [0 1 2] [1 3 5]
17 |
18 | b = arr2 > 7
19 | print b
20 | # [False False False False False False False False True True]
21 | print arr2[b]
22 | # [8 9]
23 |
24 |
25 | def multidimen_idx():
26 | arr = np.arange(6)
27 | print arr
28 | # [0 1 2 3 4 5]
29 | arr.shape = (2, 3)
30 | print arr
31 | # [[0 1 2]
32 | # [3 4 5]]
33 |
34 | print arr[1, 1], arr[1, -1]
35 | # 4 5
36 | print arr[1], arr[1][1]
37 | # [3 4 5] 4
38 |
39 | arr2 = np.arange(35).reshape(5, 7)
40 | print arr2
41 | # [[ 0 1 2 3 4 5 6]
42 | # [ 7 8 9 10 11 12 13]
43 | # [14 15 16 17 18 19 20]
44 | # [21 22 23 24 25 26 27]
45 | # [28 29 30 31 32 33 34]]
46 |
47 | print arr2[1:5:2]
48 | # [[ 7 8 9 10 11 12 13]
49 | # [21 22 23 24 25 26 27]]
50 |
51 | # 1:5:2 means row 2, 4, ::3 means every 3 column
52 | print arr2[1:5:2, ::3]
53 | # [[ 7 10 13]
54 | # [21 24 27]]
55 |
56 | print arr2[np.array([0, 2, 4]), 1:3]
57 | # [[ 1 2]
58 | # [15 16]
59 | # [29 30]]
60 |
61 | print arr2[np.array([0, 2, 4]), np.array([0, 1, 2])]
62 | # [ 0 15 30]
63 |
64 | print arr2[np.array([0, 2, 4]), 1]
65 | # [ 1 15 29]
66 |
67 | b = arr2 > 20
68 | print arr2[b]
69 | # [21 22 23 24 25 26 27 28 29 30 31 32 33 34]
70 |
71 |
72 | def n_dimension_arr():
73 | arr = np.arange(30).reshape(2, 3, 5)
74 | print arr
75 | # [[[ 0 1 2 3 4]
76 | # [ 5 6 7 8 9]
77 | # [10 11 12 13 14]]
78 | #
79 | # [[15 16 17 18 19]
80 | # [20 21 22 23 24]
81 | # [25 26 27 28 29]]]
82 |
83 |
84 | def assign_val():
85 | arr = np.arange(10)
86 | print arr
87 | # [0 1 2 3 4 5 6 7 8 9]
88 | arr[2:7] = 1
89 | print arr
90 | # [0 1 1 1 1 1 1 7 8 9]
91 | arr[2:7] = range(5)
92 | print arr
93 | # [0 1 0 1 2 3 4 7 8 9]
94 |
95 |
96 | def np_argmax():
97 | arr = np.array([1, 5, 3])
98 | print np.argmax(arr)
99 |
100 |
101 | if __name__ == '__main__':
102 | np_argmax()
103 | # assign_val()
104 | # n_dimension_arr()
105 | # single_idx()
106 | # multidimen_idx()
107 | pass
108 |
--------------------------------------------------------------------------------
/python_utils/thread_process/thread_lock.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | """
4 | This file is about operator of threading.Lock
5 | using which we can realize a thread safe counter
6 | """
7 |
8 | import logging
9 | import threading
10 | import random
11 | import time
12 |
13 |
14 | logging.basicConfig(level=logging.DEBUG,
15 | format='(%(asctime)s %(threadName)-10s) %(message)s',
16 | datefmt='%Y-%m-%d %I:%M:%S')
17 |
18 |
19 | class CounterThreadSafe(threading.Thread):
20 | def __init__(self, start=0):
21 | super(CounterThreadSafe, self).__init__()
22 | self.val = start
23 | self.lock = threading.Lock()
24 |
25 | def inc(self, num):
26 | try:
27 | logging.debug('wanting for lock, before num is %s val is %s', num, self.val)
28 | self.lock.acquire()
29 | self.val += num
30 | logging.debug('after counter val is %s', self.val)
31 | finally:
32 | self.lock.release()
33 |
34 | def inc_v2(self, num):
35 | logging.debug('wanting for lock, before num is %s val is %s', num, self.val)
36 | with self.lock:
37 | self.val += num
38 | logging.debug('after counter val is %s', self.val)
39 |
40 | g_sum = 0
41 |
42 |
43 | def do_counter(counter):
44 | global g_sum
45 | for i in xrange(0, 2):
46 | sleep_sec = random.randint(1, 3)
47 | logging.debug('now sleeping %s S', sleep_sec)
48 | g_sum += sleep_sec
49 | time.sleep(sleep_sec)
50 | # counter.inc(sleep_sec)
51 | counter.inc_v2(sleep_sec)
52 |
53 |
54 | def test_multi_thread_counter():
55 | counter = CounterThreadSafe()
56 |
57 | for i in xrange(0, 5):
58 | t = threading.Thread(target=do_counter, args=(counter, ))
59 | t.start()
60 | # t.join() # can't join in this row or it will block the main thread
61 |
62 | logging.debug('start all thread.....done')
63 |
64 |
65 | def join_all_others_thread():
66 | logging.debug('now join all the other threads')
67 | main_thread = threading.currentThread()
68 | for t in threading.enumerate():
69 | if t is not main_thread:
70 | t.join()
71 |
72 | # the following msg will be print after all the other thread done
73 | logging.debug('join all the other threads success')
74 |
75 |
76 | if __name__ == '__main__':
77 | test_multi_thread_counter()
78 | join_all_others_thread()
79 | # the following msg will be print after all the other thread done
80 | logging.debug('all the sub threads done')
81 | logging.debug('g_sum is %s', g_sum)
82 | pass
83 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/dataset/perception/dataset.txt:
--------------------------------------------------------------------------------
1 | -0.017612 14.053064 0
2 | -1.395634 4.662541 1
3 | -0.752157 6.538620 0
4 | -1.322371 7.152853 0
5 | 0.423363 11.054677 0
6 | 0.406704 7.067335 1
7 | 0.667394 12.741452 0
8 | -2.460150 6.866805 1
9 | 0.569411 9.548755 0
10 | -0.026632 10.427743 0
11 | 0.850433 6.920334 1
12 | 1.347183 13.175500 0
13 | 1.176813 3.167020 1
14 | -1.781871 9.097953 0
15 | -0.566606 5.749003 1
16 | 0.931635 1.589505 1
17 | -0.024205 6.151823 1
18 | -0.036453 2.690988 1
19 | -0.196949 0.444165 1
20 | 1.014459 5.754399 1
21 | 1.985298 3.230619 1
22 | -1.693453 -0.557540 1
23 | -0.576525 11.778922 0
24 | -0.346811 -1.678730 1
25 | -2.124484 2.672471 1
26 | 1.217916 9.597015 0
27 | -0.733928 9.098687 0
28 | -3.642001 -1.618087 1
29 | 0.315985 3.523953 1
30 | 1.416614 9.619232 0
31 | -0.386323 3.989286 1
32 | 0.556921 8.294984 1
33 | 1.224863 11.587360 0
34 | -1.347803 -2.406051 1
35 | 1.196604 4.951851 1
36 | 0.275221 9.543647 0
37 | 0.470575 9.332488 0
38 | -1.889567 9.542662 0
39 | -1.527893 12.150579 0
40 | -1.185247 11.309318 0
41 | -0.445678 3.297303 1
42 | 1.042222 6.105155 1
43 | -0.618787 10.320986 0
44 | 1.152083 0.548467 1
45 | 0.828534 2.676045 1
46 | -1.237728 10.549033 0
47 | -0.683565 -2.166125 1
48 | 0.229456 5.921938 1
49 | -0.959885 11.555336 0
50 | 0.492911 10.993324 0
51 | 0.184992 8.721488 0
52 | -0.355715 10.325976 0
53 | -0.397822 8.058397 0
54 | 0.824839 13.730343 0
55 | 1.507278 5.027866 1
56 | 0.099671 6.835839 1
57 | -0.344008 10.717485 0
58 | 1.785928 7.718645 1
59 | -0.918801 11.560217 0
60 | -0.364009 4.747300 1
61 | -0.841722 4.119083 1
62 | 0.490426 1.960539 1
63 | -0.007194 9.075792 0
64 | 0.356107 12.447863 0
65 | 0.342578 12.281162 0
66 | -0.810823 -1.466018 1
67 | 2.530777 6.476801 1
68 | 1.296683 11.607559 0
69 | 0.475487 12.040035 0
70 | -0.783277 11.009725 0
71 | 0.074798 11.023650 0
72 | -1.337472 0.468339 1
73 | -0.102781 13.763651 0
74 | -0.147324 2.874846 1
75 | 0.518389 9.887035 0
76 | 1.015399 7.571882 0
77 | -1.658086 -0.027255 1
78 | 1.319944 2.171228 1
79 | 2.056216 5.019981 1
80 | -0.851633 4.375691 1
81 | -1.510047 6.061992 0
82 | -1.076637 -3.181888 1
83 | 1.821096 10.283990 0
84 | 3.010150 8.401766 1
85 | -1.099458 1.688274 1
86 | -0.834872 -1.733869 1
87 | -0.846637 3.849075 1
88 | 1.400102 12.628781 0
89 | 1.752842 5.468166 1
90 | 0.078557 0.059736 1
91 | 0.089392 -0.715300 1
92 | 1.825662 12.693808 0
93 | 0.197445 9.744638 0
94 | 0.126117 0.922311 1
95 | -0.679797 1.220530 1
96 | 0.677983 2.556666 1
97 | 0.761349 10.693862 0
98 | -2.168791 0.143632 1
99 | 1.388610 9.341997 0
100 | 0.317029 14.739025 0
--------------------------------------------------------------------------------
/python_utils/machine_learn/logistic_regression/lr_scratch.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import numpy as np
4 | import pandas as pd
5 |
6 |
7 | class LogisticRegression(object):
8 | def __init__(self):
9 | self._map_method()
10 | pass
11 |
12 | def _map_method(self):
13 | self._do_train = {"gd": self._gd, "sgd": self._sgd}
14 |
15 | def _sigmoid(self, x):
16 | return 1.0 / (1 + np.exp(-x))
17 |
18 | def fit(self, X, Y, **opt):
19 | m, n = X.shape
20 | self._weight = np.ones((n, 1))
21 | max_iter = opt.get("max_iter", 100)
22 | alpha = opt.get("alpha", 0.01)
23 | method = opt.get("method", "sgd")
24 |
25 | for k in xrange(max_iter):
26 | try:
27 | self._do_train[method](X, Y, alpha)
28 |
29 | print "iter %s error rate %s" % (k, self._get_error_rate(X, Y))
30 | except KeyError:
31 | raise ValueError('method error')
32 |
33 | def _sgd(self, X, Y, alpha):
34 | """stochastic gradient descent"""
35 | m, n = X.shape
36 | for i in xrange(m):
37 | # pred = self._sigmoid(X[i, :] * self._weight)
38 | pred = self._sigmoid(np.dot(X[i, :], self._weight))
39 | error = Y[i] - pred
40 | self._weight = self._weight + alpha * np.matrix(X[i, :]).T * error
41 |
42 | def _gd(self, X, Y, alpha):
43 | """gradient descent"""
44 | pred = self._sigmoid(X * self._weight)
45 | error = Y - pred
46 | self._weight = self._weight + alpha * X.T * error
47 |
48 | def _get_error_rate(self, X, Y):
49 | all_num = len(Y)
50 | error_num = 0
51 | for i in xrange(all_num):
52 | pred = self._sigmoid(np.dot(X[i, :], self._weight)) > 0.5
53 | if pred != bool(Y[i]):
54 | error_num += 1
55 |
56 | return error_num * 1.0 / all_num
57 |
58 |
59 | def get_data():
60 | path = '../dataset/logistic_regression/lr_ml_action.txt'
61 |
62 | data = pd.read_csv(path, delim_whitespace=True,
63 | names=['f1', 'f2', 'label'],
64 | dtype={'A': np.float64, 'B': np.float64, 'C': np.int64})
65 |
66 | # add bias w0
67 | data['f0'] = 1
68 | print data.head()
69 | features = ['f0', 'f1', 'f2']
70 | return data[features].values, data.label.values
71 |
72 |
73 | def test_lr():
74 | X, Y = get_data()
75 |
76 | lr = LogisticRegression()
77 | arr = np.array([-1, 0, 1])
78 | print lr._sigmoid(arr)
79 | lr.fit(X, Y)
80 |
81 |
82 | if __name__ == '__main__':
83 | # get_data()
84 | test_lr()
85 | pass
86 |
--------------------------------------------------------------------------------
/python_utils/py_basic/functional_program.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | from functools import partial
4 |
5 |
6 | def inc(x):
7 | def inc_x(y):
8 | return x + y
9 | return inc_x
10 |
11 |
12 | def test_inc():
13 | inc2 = inc(2)
14 | inc5 = inc(5)
15 |
16 | print inc2(5) # 7
17 | print inc5(5) # 10
18 |
19 |
20 | def to_upper(item):
21 | return item.upper()
22 |
23 |
24 | def map_demo():
25 | name_lst = ['xy', 'bear fish', 'jay']
26 | name_len = map(len, name_lst)
27 | print name_len
28 | # [2, 9, 3]
29 |
30 | name_upper = map(to_upper, name_lst)
31 | print name_upper
32 | # ['XY', 'BEAR FISH', 'JAY']
33 |
34 | name_up = []
35 | for i in range(len(name_lst)):
36 | name_up.append(name_lst[i].upper())
37 | print name_up
38 | # ['XY', 'BEAR FISH', 'JAY']
39 |
40 | squares = map(lambda x: x * x, range(4))
41 | print squares
42 | # [0, 1, 4, 9]
43 |
44 | a = [1, 2, 3, 4]
45 | b = [17, 12, 11, 10]
46 | c = [-1, -4, 5, 9]
47 | print map(lambda x, y: x + y, a, b)
48 | # [18, 14, 14, 14]
49 | print map(lambda x, y, z: x + y + z, a, b, c)
50 | # [17, 10, 19, 23]
51 |
52 |
53 | def reduce_demo():
54 | lst = range(1, 6)
55 | print lst # [1, 2, 3, 4, 5]
56 | sum = reduce(lambda x, y: x + y, lst)
57 | print sum # 15
58 |
59 |
60 | def cal_aver():
61 | lst = range(0, 11)
62 | positive_num_cnt = 0
63 | positive_num_sum = 0
64 | for i in range(len(lst)):
65 | if lst[i] > 0:
66 | positive_num_cnt += 1
67 | positive_num_sum += lst[i]
68 |
69 | average = 0
70 | if positive_num_cnt > 0:
71 | average = positive_num_sum / positive_num_cnt
72 |
73 | print average # 5
74 |
75 |
76 | def filter_demo():
77 | lst = range(0, 11)
78 | odd_lst = filter(lambda x: x % 2, lst)
79 | print odd_lst # [1, 3, 5, 7, 9]
80 | average = reduce(lambda x, y: x + y, odd_lst) / len(odd_lst)
81 | print average # 5
82 |
83 |
84 | def f(a, b, c, d):
85 | """
86 | used by function partial_demo as demo
87 | :param a: int
88 | :param b: int
89 | :param c: int
90 | :param d: int
91 | :return:
92 | """
93 | return a * 1000 + b * 100 + c * 10 + d
94 |
95 |
96 | def partial_demo():
97 | # A partial function that calls f with
98 | # a as 3, b as 1 and c as 4.
99 | g = partial(f, 3, 1, 4)
100 | print g(5) # 3145
101 |
102 | g2 = partial(f, d=4, c=3, b=2)
103 | print g2(1) # 1234
104 |
105 |
106 | if __name__ == '__main__':
107 | partial_demo()
108 | # test_inc()
109 | # map_demo()
110 | # reduce_demo()
111 | # filter_demo()
112 | # cal_aver()
113 | pass
114 |
--------------------------------------------------------------------------------
/python_utils/py_basic/kwargs_xargs.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 |
4 | def print_keyword_args(**kwargs):
5 | # kwargs is a dict of the keyword args passed to the function
6 | for key, value in kwargs.iteritems():
7 | print "%s = %s" % (key, value)
8 |
9 |
10 | def test_print_keyword_args():
11 | print_keyword_args(first_name="John", last_name="Doe")
12 | # first_name = John
13 | # last_name = Doe
14 |
15 | dic_args = {'first_name': 'Bobby', 'last_name': 'Smith'}
16 | # print_keyword_args(dic_args)
17 | # TypeError: print_keyword_args() takes exactly 0 arguments (1 given)
18 | print_keyword_args(**dic_args)
19 | # first_name = Bobby
20 | # last_name = Smith
21 |
22 |
23 | def print_everything(*args):
24 | for count, thing in enumerate(args):
25 | print '{0} -> {1}'.format(count, thing)
26 |
27 |
28 | def test_print_everything():
29 | print_everything('apple', 'banana', 'cabbage')
30 | # 0->apple
31 | # 1->banana
32 | # 2->cabbage
33 |
34 |
35 | def func(required_arg, *args, **kwargs):
36 | # required_arg is a positional-only parameter.
37 | print required_arg
38 |
39 | # args is a tuple of positional arguments,
40 | # because the parameter name has * prepended.
41 | if args: # If args is not empty.
42 | print args
43 |
44 | # kwargs is a dictionary of keyword arguments,
45 | # because the parameter name has ** prepended.
46 | if kwargs: # If kwargs is not empty.
47 | print kwargs
48 |
49 |
50 | def test_func():
51 | func("required argument")
52 | # required argument
53 | func("required argument", 1, 2, '3')
54 | # required argument
55 | # (1, 2, '3')
56 | func("required argument", 1, 2, '3', keyword1=4, keyword2="foo")
57 | # required argument
58 | # (1, 2, '3')
59 | # {'keyword2': 'foo', 'keyword1': 4}
60 | # func()
61 | # TypeError: func() takes at least 1 argument (0 given)
62 |
63 |
64 | # kwargs default value
65 | class ExampleClass:
66 | def __init__(self, **kwargs):
67 | self.val = kwargs['val']
68 | self.val2 = kwargs.get('val2')
69 | self.val3 = kwargs.get('val3', 'default_val3')
70 | self.val4 = kwargs.pop('val4', 'default_val4')
71 |
72 |
73 | def default_kwargs(**kwargs):
74 | options = {
75 | 'option1': 'default_value1',
76 | 'option2': 'default_value2',
77 | 'option3': 'default_value3', }
78 |
79 | options.update(kwargs)
80 | print options
81 |
82 |
83 | def test_default_kwargs():
84 | default_kwargs()
85 | # {'option2': 'default_value2', 'option3': 'default_value3', 'option1': 'default_value1'}
86 | default_kwargs(option1='new_value1', option3='new_value3')
87 | # {'option2': 'default_value2', 'option3': 'new_value3', 'option1': 'new_value1'}
88 |
89 | if __name__ == '__main__':
90 | # test_print_keyword_args()
91 | # test_print_everything()
92 | # test_func()
93 | test_default_kwargs()
94 | pass
95 |
--------------------------------------------------------------------------------
/python_utils/py_basic/str_basic.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | import string
4 | import random
5 |
6 |
7 | def str_format():
8 | s1 = """abc%sddd""" % 'asd'
9 | print s1
10 | url = """http://127.0.0.1:9325/shortvideo/checkBlack?url=%s&serial=%s""" % ('http%3A%2F%2Fzbasecapture.bs2.yy.com%2F42269159_1499248536403_3.jpg', '77777777')
11 | print url
12 |
13 |
14 | def generator_random_str(size=6, str_source=string.digits + string.lowercase):
15 | """:return size num str(in 'A~z, 0-9')
16 | eg. size=6 return 'ad14df'
17 | [random.choice('abcde') for _ in range(3)] -> ['a', 'b', 'b']
18 | ''.join(['a', 'b', 'b']) -> 'abb'
19 | """
20 | return ''.join(random.choice(str_source) for _ in xrange(size))
21 |
22 |
23 | def str_split():
24 | s = 'python_worker_name&topSid_111_appid_111&topSid_222_appid_222'
25 | print s[0:s.find('&')]
26 | print s.split('&')
27 | print s.split('&')[1:]
28 |
29 |
30 | def remove_sub_str():
31 | src = 'channel_1'
32 | sub_s = 'chan'
33 | print src[src.find(sub_s):]
34 | print src.find(sub_s)
35 | print src.replace(sub_s, '')
36 |
37 |
38 | def str_format_once():
39 | query = """insert into {tb_name} (create_time, appid) VALUES (%s,%s)"""
40 | tb_name = 'tb_audio_rec_ret_2017_11'
41 | # query % tb_name error
42 | print query.format(tb_name=tb_name)
43 | # insert into tb_audio_rec_ret_2017_11 (create_time, appid) VALUES (%s,%s)
44 |
45 |
46 | def str_replace():
47 | import time
48 | s = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
49 | print s
50 | # 2017-11-09 17:26:34
51 | print s[0:7].replace('-', '_')
52 | # 2017_11
53 |
54 |
55 | def char_2int_2char():
56 | print ord('a')
57 | # 97
58 | print chr(97)
59 | # a
60 |
61 |
62 | def fill_zero():
63 | s_num = 11
64 | print str(s_num).zfill(5)
65 | # 00011
66 |
67 |
68 | if __name__ == '__main__':
69 | print fill_zero()
70 | # char_2int_2char()
71 | # str_replace()
72 | # str_format_once()
73 | # remove_sub_str()
74 | # str_split()
75 | # str_format()
76 | # print generator_random_str()
77 | # print generator_random_str(3, 'abc123')
78 | # s = '123'
79 | # if s.find("12") == -1:
80 | # print 'no no '
81 |
82 | b = 0
83 | b = None
84 | # if b is not zero not None(like -1, 1) it will print
85 | if b:
86 | print '%s not zero' % b
87 |
88 | # url = 'bear fish.com'
89 | # if url.endswith('.com'):
90 | # url = url[:-4]
91 | # print url
92 |
93 | url = 'www.myzaker.com/article/58daf1b69490cbe53400001b/'
94 | # if 'aa' in url:
95 | # print '1'
96 | # elif 'comp' in url:
97 | # print '2'
98 | # else:
99 | # print '3'
100 | # print url.find('myzaker')
101 | # print url.find('www.myzaker')
102 | # print url.find('http')
103 |
104 | # print s[1:]
105 | # print s[:]
106 | # print s[:2]
107 | pass
108 |
--------------------------------------------------------------------------------
/python_utils/py_basic/set_ope.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | """
3 | python unique list -> set usage
4 | """
5 |
6 |
7 | def define_set():
8 | """2 ways of defining set"""
9 | set_1 = set([1, 2, 3])
10 | print type(set_1)
11 | print set_1
12 |
13 | set_2 = {2, 3, 2}
14 | print type(set_2)
15 | #
16 | print set_2
17 | # set([2, 3])
18 |
19 | a = set((1, 2, 3, 4))
20 | b = set([3, 4, 5, 6])
21 | print a | b # Union
22 | # {1, 2, 3, 4, 5, 6}
23 | print a & b # Intersection
24 | # {3, 4}
25 | print a < b # Subset
26 | # False
27 | print a - b # Difference
28 | # {1, 2}
29 | print a ^ b # Symmetric Difference
30 | # {1, 2, 5, 6}
31 |
32 |
33 | def set_basic_usage():
34 | s1 = set()
35 |
36 | s1.add('abc')
37 | s1.add('abc')
38 | s1.add(123)
39 | s1.add(777)
40 | print (s1)
41 |
42 | if 123 in s1:
43 | print ' find it and remove it'
44 | s1.remove(123)
45 | print s1
46 |
47 |
48 | def dict_val_set():
49 | dic_val_set = {}
50 | dic_val_set['abc'] = set([123])
51 | dic_val_set['abc'].add(456)
52 | dic_val_set['abc'].add(123)
53 | print dic_val_set
54 | # {'abc': set([456, 123])}
55 | dic_val_set['ddd'] = set()
56 | dic_val_set['ddd'].add(123)
57 |
58 | for k in dic_val_set.keys():
59 | if 123 in dic_val_set[k]:
60 | print dic_val_set[k]
61 |
62 |
63 | def set_remove():
64 | # s_src = {1, 3, 5, 7}
65 | s_src = {1}
66 | # s2 = {1, 3, 2}
67 | s2 = [1, 3, 2]
68 | # raise error
69 | # print s_src.remove(*s2)
70 | try:
71 | s_src.remove(*s2)
72 | except Exception as e:
73 | print e
74 | # print s_src
75 | print s_src - s2
76 |
77 | # print s_src | s2
78 | # set([1, 2, 3, 5, 7])
79 | # print s_src & s2
80 | # set([1, 3])
81 |
82 |
83 | def set_lst():
84 | s1 = {1, 2, 3}
85 | lst_1 = []
86 | # set to list
87 | lst_1 += s1
88 |
89 | print s1
90 | print lst_1
91 |
92 |
93 | def dict_key_to_set():
94 | d = {'111': 1, 'aaa': 111}
95 | s1 = set(d.keys())
96 | print s1
97 | # set(['111', 'aaa'])
98 |
99 | s2 = {'111', 111}
100 | print s1 & s2
101 |
102 |
103 | def set_diff():
104 | s1 = {1, 3}
105 | s2 = {1, 2, 4}
106 |
107 | print s1 - s2
108 | print s1.difference(s2)
109 | # set([3])
110 |
111 |
112 | def set_hash():
113 | lst = [1, 555, 372, 6, 6, 372, 222]
114 | h_set = set(lst)
115 | print h_set # unordered
116 | # set([1, 555, 372, 222, 6])
117 |
118 | if __name__ == '__main__':
119 | set_hash()
120 | # set_diff()
121 | # dict_key_to_set()
122 | # set_lst()
123 | # set_remove()
124 | # define_set()
125 | # dict_val_set()
126 | # set_basic_usage()
127 | # print min(3, 4, -1)
128 | # import time
129 | # import random
130 | # timestamp = int(time.time())
131 | # print random.randint(0, 1000000) + timestamp
132 | pass
133 |
--------------------------------------------------------------------------------
/python_utils/al_lt_common/al_str.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 |
4 | def max_unique_substr(src):
5 | LEN = len(src)
6 |
7 | lst_ret = []
8 | cur_max = 0
9 | s_tmp = ''
10 | for i in xrange(LEN):
11 | if src[i] not in s_tmp:
12 | s_tmp += src[i]
13 | cur_max += 1
14 | if len(lst_ret):
15 | if len(lst_ret[0]) < cur_max:
16 | lst_ret = [s_tmp]
17 | else:
18 | lst_ret.append(s_tmp)
19 | else:
20 | idx = s_tmp.find(src[i])
21 | s_tmp = s_tmp[idx + 1:] + src[i]
22 | cur_max = len(s_tmp)
23 | if len(lst_ret[0]) == cur_max and s_tmp not in lst_ret:
24 | lst_ret.append(s_tmp)
25 |
26 | return lst_ret
27 |
28 |
29 | def max_unique_substr_len(src):
30 | char_last_idx = [-1] * 256
31 | b, e, cur_max, max_len = -1, -1, 0, 0
32 |
33 | for i in xrange(len(src)):
34 | char_idx = ord(src[i])
35 | last_idx = char_last_idx[char_idx]
36 | if last_idx == -1 or last_idx > e or last_idx < b:
37 | char_last_idx[char_idx] = i
38 | e += 1
39 | cur_max += 1
40 | if cur_max > max_len:
41 | max_len = cur_max
42 | else:
43 | e = i
44 | b = last_idx + 1
45 | cur_max = e - b + 1
46 | char_last_idx[char_idx] = i
47 |
48 | return max_len
49 |
50 |
51 | def test_max_unique_substr():
52 | s1 = 'abdefgabef'
53 | print max_unique_substr(s1)
54 | # ['abdefg', 'bdefga', 'defgab']
55 | s1 = 'bbbb'
56 | print max_unique_substr(s1)
57 | # ['b']
58 | s1 = 'geeksforgeeks'
59 | print max_unique_substr(s1)
60 | # ['eksforg', 'ksforge']
61 | s1 = 'qwertqwer'
62 | print max_unique_substr(s1)
63 |
64 | s1 = 'abdefgabef'
65 | print max_unique_substr_len(s1)
66 |
67 | s1 = 'abcd'
68 | print max_unique_substr_len(s1)
69 |
70 | s1 = 'bbbb'
71 | print max_unique_substr_len(s1)
72 |
73 | s1 = 'geeksforgeeks'
74 | print max_unique_substr_len(s1)
75 |
76 | s1 = 'qwertqwer'
77 | print max_unique_substr_len(s1)
78 |
79 |
80 | def print_lst_str(lst_s):
81 | print "".join(lst_s)
82 |
83 |
84 | def str_permute(lst_s, b, e):
85 | if b == e:
86 | print_lst_str(lst_s)
87 |
88 | for i in xrange(b, e + 1):
89 | lst_s[b], lst_s[i] = lst_s[i], lst_s[b]
90 | # str_permute(lst_s, i + 1, e)
91 | str_permute(lst_s, b + 1, e)
92 | lst_s[i], lst_s[b] = lst_s[b], lst_s[i]
93 |
94 |
95 | def print_permutation_str(str):
96 | n, lst_s = len(str), list(str)
97 | str_permute(lst_s, 0, n - 1)
98 |
99 |
100 | def test_pps():
101 | s = 'abc'
102 | print_permutation_str(s)
103 | s = 'abcd'
104 | print_permutation_str(s)
105 |
106 |
107 | if __name__ == '__main__':
108 | # test_pps()
109 | # test_max_unique_substr()
110 |
111 | print list('abc')
112 | # ['a', 'b', 'c']
113 | pass
114 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/dataset/logistic_regression/lr_ml_action.txt:
--------------------------------------------------------------------------------
1 | -0.017612 14.053064 0
2 | -1.395634 4.662541 1
3 | -0.752157 6.538620 0
4 | -1.322371 7.152853 0
5 | 0.423363 11.054677 0
6 | 0.406704 7.067335 1
7 | 0.667394 12.741452 0
8 | -2.460150 6.866805 1
9 | 0.569411 9.548755 0
10 | -0.026632 10.427743 0
11 | 0.850433 6.920334 1
12 | 1.347183 13.175500 0
13 | 1.176813 3.167020 1
14 | -1.781871 9.097953 0
15 | -0.566606 5.749003 1
16 | 0.931635 1.589505 1
17 | -0.024205 6.151823 1
18 | -0.036453 2.690988 1
19 | -0.196949 0.444165 1
20 | 1.014459 5.754399 1
21 | 1.985298 3.230619 1
22 | -1.693453 -0.557540 1
23 | -0.576525 11.778922 0
24 | -0.346811 -1.678730 1
25 | -2.124484 2.672471 1
26 | 1.217916 9.597015 0
27 | -0.733928 9.098687 0
28 | -3.642001 -1.618087 1
29 | 0.315985 3.523953 1
30 | 1.416614 9.619232 0
31 | -0.386323 3.989286 1
32 | 0.556921 8.294984 1
33 | 1.224863 11.587360 0
34 | -1.347803 -2.406051 1
35 | 1.196604 4.951851 1
36 | 0.275221 9.543647 0
37 | 0.470575 9.332488 0
38 | -1.889567 9.542662 0
39 | -1.527893 12.150579 0
40 | -1.185247 11.309318 0
41 | -0.445678 3.297303 1
42 | 1.042222 6.105155 1
43 | -0.618787 10.320986 0
44 | 1.152083 0.548467 1
45 | 0.828534 2.676045 1
46 | -1.237728 10.549033 0
47 | -0.683565 -2.166125 1
48 | 0.229456 5.921938 1
49 | -0.959885 11.555336 0
50 | 0.492911 10.993324 0
51 | 0.184992 8.721488 0
52 | -0.355715 10.325976 0
53 | -0.397822 8.058397 0
54 | 0.824839 13.730343 0
55 | 1.507278 5.027866 1
56 | 0.099671 6.835839 1
57 | -0.344008 10.717485 0
58 | 1.785928 7.718645 1
59 | -0.918801 11.560217 0
60 | -0.364009 4.747300 1
61 | -0.841722 4.119083 1
62 | 0.490426 1.960539 1
63 | -0.007194 9.075792 0
64 | 0.356107 12.447863 0
65 | 0.342578 12.281162 0
66 | -0.810823 -1.466018 1
67 | 2.530777 6.476801 1
68 | 1.296683 11.607559 0
69 | 0.475487 12.040035 0
70 | -0.783277 11.009725 0
71 | 0.074798 11.023650 0
72 | -1.337472 0.468339 1
73 | -0.102781 13.763651 0
74 | -0.147324 2.874846 1
75 | 0.518389 9.887035 0
76 | 1.015399 7.571882 0
77 | -1.658086 -0.027255 1
78 | 1.319944 2.171228 1
79 | 2.056216 5.019981 1
80 | -0.851633 4.375691 1
81 | -1.510047 6.061992 0
82 | -1.076637 -3.181888 1
83 | 1.821096 10.283990 0
84 | 3.010150 8.401766 1
85 | -1.099458 1.688274 1
86 | -0.834872 -1.733869 1
87 | -0.846637 3.849075 1
88 | 1.400102 12.628781 0
89 | 1.752842 5.468166 1
90 | 0.078557 0.059736 1
91 | 0.089392 -0.715300 1
92 | 1.825662 12.693808 0
93 | 0.197445 9.744638 0
94 | 0.126117 0.922311 1
95 | -0.679797 1.220530 1
96 | 0.677983 2.556666 1
97 | 0.761349 10.693862 0
98 | -2.168791 0.143632 1
99 | 1.388610 9.341997 0
100 | 0.317029 14.739025 0
--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_date_time.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import numpy as np
4 | import pandas as pd
5 | import matplotlib.pyplot as plt
6 | import seaborn
7 | seaborn.set()
8 |
9 |
10 | def np_dt():
11 | date = np.array('2017-09-28', dtype=np.datetime64)
12 | print date + np.arange(5)
13 | # ['2017-09-28' '2017-09-29' '2017-09-30' '2017-10-01' '2017-10-02']
14 |
15 |
16 | def series_dt():
17 | index = pd.DatetimeIndex(['2014-07-04', '2014-08-04',
18 | '2015-07-04', '2015-08-04'])
19 | data = pd.Series([0, 1, 2, 3], index=index)
20 | print data
21 | # 2014-07-04 0
22 | # 2014-08-04 1
23 | # 2015-07-04 2
24 | # 2015-08-04 3
25 | # dtype: int64
26 |
27 | print data['2014-07-04':'2014-09-04']
28 | # 2014-07-04 0
29 | # 2014-08-04 1
30 | # dtype: int64
31 |
32 | print data['2015']
33 | # 2015-07-04 2
34 | # 2015-08-04 3
35 | # dtype: int64
36 |
37 |
38 | def pd_time():
39 | from datetime import datetime
40 | dates = pd.to_datetime([datetime(2015, 7, 3), '4th of July, 2015',
41 | '2015-Jul-6', '07-07-2015', '20150708'])
42 |
43 | print dates
44 |
45 | print dates.to_period('D')
46 | # PeriodIndex(['2015-07-03', '2015-07-04', '2015-07-06', '2015-07-07',
47 | # '2015-07-08'],
48 | # dtype='int64', freq='D')
49 | print dates - dates[0]
50 | # TimedeltaIndex(['0 days', '1 days', '3 days',
51 | # '4 days', '5 days'],
52 | # dtype='timedelta64[ns]', freq=None)
53 |
54 | print pd.date_range('2015-07-03', '2015-07-5')
55 | # DatetimeIndex(['2015-07-03', '2015-07-04', '2015-07-05'],
56 | # dtype='datetime64[ns]', freq='D')
57 | print pd.date_range('2015-07-03', periods=3)
58 | # as above
59 |
60 | print pd.date_range('2015-07-03', periods=3, freq='H')
61 | # DatetimeIndex(['2015-07-03 00:00:00', '2015-07-03 01:00:00',
62 | # '2015-07-03 02:00:00'],
63 | # dtype='datetime64[ns]', freq='H')
64 |
65 | print pd.period_range('2015-07', periods=3, freq='M')
66 | # PeriodIndex(['2015-07', '2015-08', '2015-09'],
67 | # dtype='int64', freq='M')
68 |
69 |
70 | def pd_time_offset():
71 | from pandas.tseries.offsets import BDay
72 |
73 | print pd.timedelta_range(0, periods=3, freq="2H30T")
74 | # TimedeltaIndex(['00:00:00', '02:30:00', '05:00:00'],
75 | # dtype='timedelta64[ns]', freq='150T')
76 | print pd.date_range('2015-07-01', periods=3, freq=BDay())
77 | # DatetimeIndex(['2015-07-01', '2015-07-02', '2015-07-03'],
78 | # dtype='datetime64[ns]', freq='B')
79 |
80 |
81 | def pandas_datareader_1():
82 | from pandas_datareader import data
83 | goog = data.DataReader('GOOG', start='2004', end='2016',
84 | data_source='google')
85 |
86 | print goog.head()
87 |
88 | goog = goog['Close']
89 | goog.plot()
90 |
91 |
92 | if __name__ == '__main__':
93 | pandas_datareader_1()
94 | # pd_time_offset()
95 | # pd_time()
96 | # series_dt()
97 | # np_dt()
98 | pass
99 |
--------------------------------------------------------------------------------
/python_utils/py_basic/time_ope.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | """
4 | get the seconds since epoch from the time + date output
5 | function time consume
6 | https://stackoverflow.com/questions/5478351/python-time-measure-function
7 | """
8 |
9 | import time
10 | import timeit
11 |
12 |
13 | def test_time_consume():
14 | start = time.clock()
15 | time.sleep(1)
16 | print time.clock() - start
17 | # 0.999735009203
18 |
19 |
20 | def timing(f):
21 | def wrap(*args):
22 | start = time.time()
23 | ret = f(*args)
24 | end = time.time()
25 | print '%s function took %0.3f ms' % (f.func_name, (end - start) * 1000.0)
26 | return ret
27 | return wrap
28 |
29 |
30 | @timing
31 | def test_time():
32 | time.sleep(1.1)
33 | # test_time function took 1101.000 ms
34 |
35 |
36 | def timeit_test():
37 | timeit.timeit()
38 |
39 |
40 | def sleep_milliseconds(mi_sec=50):
41 |
42 | time.sleep(mi_sec / 1000.0)
43 |
44 |
45 | @timing
46 | def test_sp_mi_sec():
47 | sleep_milliseconds()
48 |
49 |
50 | def test_time_transform():
51 | # time_stamp = int(time.time())
52 | # s = '123'
53 | # print time_stamp
54 | # # 1509953402
55 | # print "%s_%s.pcm" % (s, time_stamp)
56 | # # 123_1509953402.pcm
57 | # s += str(time_stamp)
58 | # print s
59 |
60 | # print time.ctime(time_stamp)
61 | # Thu Jun 28 07:58:58 2018
62 | print time.strftime("%Y-%m-%d %H:%M:%S")
63 | # 2018-06-28 08:00:35
64 |
65 | # print time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(1509953402))
66 | # # 2017-11-06 15:30:02
67 | #
68 | # time_stamp = int(time.time())
69 | # print time_stamp
70 | # print divmod(time_stamp, 3600)
71 |
72 |
73 | def str_time():
74 | import datetime
75 | str_time = '2018-02-01 0:0:0'
76 | d = datetime.datetime.strptime(str_time, "%Y-%m-%d %H:%M:%S")
77 | print d, d.strftime("%Y-%m-%d %H:%M:%S")
78 | # 2018-02-01 00:00:00, 2018-02-01 00:00:00
79 |
80 | for i in xrange(2):
81 | print d, d + datetime.timedelta(minutes=30)
82 | d = d + datetime.timedelta(minutes=30)
83 |
84 | # 2018-02-01 00:00:00 2018-02-01 00:30:00
85 | # 2018-02-01 00:30:00 2018-02-01 01:00:00
86 |
87 |
88 | if __name__ == '__main__':
89 | # import datetime
90 | # str_time = '2018-03-08T08:00:00.000'
91 | # d = datetime.datetime.strptime(str_time, "%Y-%m-%dT%H:%M:%S.%f")
92 | # print d
93 | # 2018-03-08 08:00:00
94 |
95 | import time
96 | str_time = '2018-03-08T08:00:00.000'
97 | # str_time.replace('T', ' ')
98 |
99 | # d = time.strftime("%Y-%m-%d %H:%M:%S.%f")
100 |
101 | import time
102 |
103 | str_time = '2018-03-08T08:00:00.000'
104 | d = time.strptime(str_time, "%Y-%m-%dT%H:%M:%S.%f")
105 |
106 | print d
107 | # time.struct_time(tm_year=2018, tm_mon=3, tm_mday=8, tm_hour=8, tm_min=0, tm_sec=0, tm_wday=3, tm_yday=67, tm_isdst=-1)
108 |
109 | print time.strftime("%Y-%m-%d %H:%M:%S", d)
110 | # 2018-03-08 08:00:00
111 |
112 | # str_time()
113 | # test_time_consume()
114 | # test_time()
115 | # test_sp_mi_sec()
116 | # test_time_transform()
117 | pass
118 |
--------------------------------------------------------------------------------
/python_utils/http_basic/url_ope.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 |
4 | import urllib
5 | import requests
6 | import urlparse
7 |
8 |
9 | def url_quote():
10 | raw_url = 'http://bs2-audiorec.oss-cn-shanghai.aliyuncs.com/e98f81fb526061e92a26eda17955c219.pcm'
11 | url = urllib.quote(raw_url)
12 | print url
13 | # http%3A//bs2-audiorec.oss-cn-shanghai.aliyuncs.com/e98f81fb526061e92a26eda17955c219.pcm
14 | print urllib.unquote(url)
15 | # http://bs2-audiorec.oss-cn-shanghai.aliyuncs.com/e98f81fb526061e92a26eda17955c219.pcm
16 | print urllib.quote("河=&源")
17 | # %E6%B2%B3%E6%BA%90
18 |
19 |
20 | def req_with_para():
21 | d_para = {"name": "xy", "age": 21}
22 | print requests.get('http://xy.com', params=d_para)
23 |
24 | # ordered name-value pairs
25 | d_sorted_para = [("age", 21), ("name", "xy")]
26 | print requests.get('http://xy.com', params=d_sorted_para)
27 |
28 |
29 | def url_encode_v1():
30 | f = {'eventName': 'myEvent', 'eventDescription': '飞龙在天'}
31 | print urllib.urlencode(f)
32 | # eventName=myEvent&eventDescription=%E9%A3%9E%E9%BE%99%E5%9C%A8%E5%A4%A9
33 |
34 |
35 | def url_encode_v2():
36 | d_para = {"name": "xy熊大", "age": 21}
37 | print '&'.join('%s=%s' % (k, v) for k, v in d_para.iteritems())
38 | # age=21&name=xy熊大
39 | print '&'.join('%s=%s' % (k, urllib.quote(str(v))) for k, v in d_para.iteritems())
40 | # age=21&name=xy%E7%86%8A%E5%A4%A7
41 |
42 | base_url = 'xy.com/'
43 | url = 'http://%s?%s' % (base_url, '&'.join('%s=%s' % (k, urllib.quote(str(v))) for k, v in d_para.iteritems()))
44 | print url
45 | # http://xy.com/?age=21&name=xy%E7%86%8A%E5%A4%A7
46 |
47 | print urllib.unquote(url)
48 | # http://xy.com/?age=21&name=xy熊大
49 |
50 |
51 | def post_request_tw():
52 | mp4Url = 'https://bilinimg.bs2ul-ssl.yy.com/android2222.mp4'
53 | mp4Url = 'http://bilinaudiop.bs2dl.yy.com/odgud7b58056e79243f6bacb6580ce0506b1_36695268409460340_37155969.mp4?token=sgCAAFyARE0BAM2BQ1oAAAAAfTtDWgAAAAAMsEEkB0NPTlRFWFQJaQB7ImJ1Y2tldCI6ImJpbGluYXVkaW9wIiwiZmlsZW5hbWUiOiJvZGd1ZDdiNTgwNTZlNzkyNDNmNmJhY2I2NTgwY2UwNTA2YjFfMzY2OTUyNjg0MDk0NjAzNDBfMzcxNTU5NjkubXA0In0EQVVUSAMEAAMAAADI_C7ba_qUVQLIXkAWf7r_sF_FnQ'
54 |
55 | d_para = {"mp4Url": mp4Url,
56 | "secretKey": "XY-bl-audio-rec-text-ret",
57 | "serial": "17598411"}
58 |
59 | print urllib.urlencode(d_para)
60 |
61 | # rsp = requests.get('http://172.27.49.16:8887/bilin/audiorec/', params=d_para)
62 | rsp = requests.get('http://61.147.186.82:9997/bilin/audiorec/', params=d_para)
63 | print rsp.url # 输出请求的 url
64 | print rsp.content
65 |
66 | s = "sign=052c177ab75dfd53ab6b1cdc25569ef1&text=%E9%83%BD%E6%95%8F%E6%B3%95%E8%BD%AE%E5%8A%9F%E7%BB%83%E4%B9%A0%E8%80%85%E8%B7%B3%E6%A5%BC%E5%89%B2%E8%85%95%E6%8A%95%E6%B2%B3%EF%BC%8C&ts=1513238084&code=0&serial=17598411"
67 | print urllib.unquote(s)
68 |
69 |
70 | def url_parse():
71 | """get url query parameters"""
72 | url = 'http://foo.appspot.com/abc?def=ghi'
73 | parsed = urlparse.urlparse(url)
74 |
75 | print urlparse.parse_qs(parsed.query)['def']
76 | # ['ghi']
77 |
78 |
79 | if __name__ == '__main__':
80 | url_parse()
81 | # post_request_tw()
82 | # url_quote()
83 | # url_encode_v1()
84 | # url_encode_v2()
85 | pass
86 |
--------------------------------------------------------------------------------
/python_utils/numpy_operate/random_arr.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | """
4 | This module is some example about random array
5 | """
6 | import numpy as np
7 |
8 |
9 | def generate_random_2d_arr(col, row):
10 | """
11 | generate random 2d array from 0~col*row
12 | :param col: the num of column
13 | :param row: the num of row
14 | :return: like generate_random_2d_arr(4, 3)
15 | [[ 7 10 5]
16 | [ 3 4 2]
17 | [ 8 11 6]
18 | [ 9 1 0]]
19 | """
20 | return np.random.permutation(col * row).reshape(row, col)
21 |
22 |
23 | def random_arr():
24 | a = np.random.random(size=(2, 4))
25 | print a
26 | # [[ 0.13652737 0.32546344 0.58527282 0.0899639 ]
27 | # [ 0.21190661 0.05351992 0.42603268 0.17524264]]
28 |
29 |
30 | def random_int_arr():
31 | print np.random.random_integers(5)
32 | # like 3
33 | arr = np.random.random_integers(12, size=(3, 4))
34 | print arr
35 | # [[ 2 9 7 6]
36 | # [ 9 1 9 1]
37 | # [ 8 6 11 5]]
38 | d1 = np.random.random_integers(1, 6, 10)
39 | print d1
40 | # [6 4 5 2 4 1 1 5 6 2]
41 | arr_f = 0.5 * (np.random.random_integers(12, size=(8, )) - 1)
42 | print arr_f
43 | # [ 5.5 2.5 2.5 1. 4. 4.5 5.5 3. ]
44 | print np.random.randint(12, size=(3, 4))
45 | # [[0 7 1 8]
46 | # [7 1 1 2]
47 | # [8 4 9 3]]
48 |
49 |
50 | def sample_rows():
51 | arr1 = np.random.randint(5, size=(5, 3))
52 | print arr1
53 | # [[0 0 2]
54 | # [1 2 0]
55 | # [0 0 4]
56 | # [3 3 4]
57 | # [4 3 2]]
58 |
59 | print arr1[[1, 2]]
60 | # [[1 2 0]
61 | # [0 0 4]]
62 |
63 | idx = np.random.randint(5, size=2)
64 | print idx
65 | # [1 2]
66 | print arr1[idx, :]
67 | # [[1 2 0]
68 | # [0 0 4]]
69 | print arr1[idx, ]
70 | # [[1 2 0]
71 | # [0 0 4]]
72 |
73 | print arr1[np.random.randint(arr1.shape[0], size=2), :]
74 | # [[0 0 2]
75 | # [4 3 2]]
76 |
77 |
78 | def choice_arr():
79 | """
80 | numpy.random.choice(a, size=None, replace=True, p=None)
81 | Generates a random sample from a given 1-D array
82 | a : 1-D array-like or int
83 | If an ndarray, a random sample is generated from its elements.
84 | If an int, the random sample is generated as if a were np.arange(a)
85 | """
86 | arr1 = np.arange(5)
87 | print arr1
88 | # [0 1 2 3 4]
89 | print np.random.choice(arr1, 2)
90 | # [4 0]
91 | print np.random.choice(5, 2)
92 | # [3 0]
93 |
94 |
95 | def ran_seed():
96 | sd = 3
97 | np.random.seed(sd)
98 | print np.random.rand(4)
99 | # [ 0.5507979 0.70814782 0.29090474 0.51082761]
100 | print np.random.rand(4)
101 | # [ 0.89294695 0.89629309 0.12558531 0.20724288]
102 |
103 | np.random.seed(sd)
104 | print np.random.rand(4)
105 | # [ 0.5507979 0.70814782 0.29090474 0.51082761]
106 |
107 | np.random.seed(sd)
108 | arr = np.random.randint(5, size=(2, 3))
109 | print arr
110 | # [[2 0 1]
111 | # [3 0 0]]
112 | arr = np.random.randint(5, size=(2, 3))
113 | print arr
114 | # [[0 3 2]
115 | # [3 1 1]]
116 | np.random.seed(sd)
117 | arr = np.random.randint(5, size=(2, 3))
118 | print arr
119 | # [[2 0 1]
120 | # [3 0 0]]
121 |
122 | if __name__ == '__main__':
123 | ran_seed()
124 | # choice_arr()
125 | # print generate_random_2d_arr(4, 3)
126 | # random_arr()
127 | # random_int_arr()
128 |
129 | pass
130 |
--------------------------------------------------------------------------------
/python_utils/matplot/basic.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | """
3 | This file is about `matplotlib`
4 | Mainly cited from http://matplotlib.org/users/pyplot_tutorial.html
5 | """
6 |
7 | import matplotlib.pyplot as plt
8 | import numpy as np
9 | import random
10 |
11 |
12 | def basic_linear():
13 | y_lst = [1, 2, 3, 4]
14 | # y_lst = random.sample(range(80000), 500)
15 | plt.plot(y_lst)
16 | plt.ylabel('y axis value')
17 | plt.show()
18 |
19 |
20 | def plot_x1_y4():
21 | """
22 | plot a line x = 1
23 | :return:
24 | """
25 | plt.plot([0, 0], [0, 4], color='red', linewidth=3.0)
26 | plt.axis([-1, 1, -4, 4])
27 | plt.show()
28 |
29 |
30 | def basic_curve():
31 | x = np.linspace(0, 2, 11)
32 | print x
33 | y = x ** 3 - 5 * x ** 2 + 6 * x + 1
34 | print y
35 | # plt.plot(x, y, 'r-')
36 | # plt.plot(x, y)
37 | lines = plt.plot([1, 2, 3, 4], [1, 4, 9, 16])
38 | plt.setp(lines, color='r')
39 | plt.show()
40 | # plt.axis([0, 100, 0, 100])
41 |
42 |
43 | def multi_curve():
44 | t = np.arange(0., 5., 0.2)
45 | print t
46 | # plt.plot(t, t, 'r-', t, t**2, 'bs', t, t**3, 'g^')
47 | # plt.show()
48 |
49 |
50 | def f(t):
51 | return np.exp(-t) * np.cos(2 * np.pi * t)
52 |
53 |
54 | def multi_figure():
55 | plt.figure(1) # the first figure
56 | plt.subplot(211) # the first subplot in the first figure
57 | plt.plot([1, 2, 3])
58 | plt.subplot(212) # the second subplot in the first figure
59 | plt.plot([4, 5, 6, 7, 11])
60 |
61 | plt.figure(2) # a second figure
62 | plt.plot([4, 5, 6]) # creates a subplot(111) by default
63 |
64 | plt.figure(1) # figure 1 current; subplot(212) still current
65 | plt.subplot(211) # make subplot(211) in figure1 current
66 | plt.title('Easy as 1, 2, 3') # subplot 211 title
67 |
68 | plt.show()
69 |
70 |
71 | def multi_figure_two():
72 | t1 = np.arange(0., 5, 0.1)
73 | t2 = np.arange(0., 5, 0.02)
74 |
75 | plt.figure(1)
76 | plt.subplot(211)
77 | plt.plot(t1, f(t1), 'k')
78 |
79 | plt.subplot(212)
80 | plt.plot(t2, np.cos(2 * np.pi * t2), 'bo')
81 |
82 | plt.show()
83 |
84 |
85 | def histogram():
86 | x_mul = [np.random.randn(n) for n in [1000, 1000, 1000]]
87 | print x_mul
88 | bin = 10
89 | plt.hist(x_mul, bin)
90 | plt.show()
91 |
92 |
93 | def histogram_two():
94 | x_mul = [random.sample(range(0, 100), n) for n in [60, 50, 70]]
95 | print x_mul[0]
96 | print x_mul[1]
97 | print x_mul[2]
98 | bin = 10
99 | plt.hist(x_mul, bin)
100 | plt.show()
101 |
102 |
103 | def plot_2d():
104 | x = [1, 2, 3, 4, 5, 6, 7]
105 | y = [2.6, 3.6, 8.3, 56, 12.7, 8.9, 5.3]
106 | plt.plot(x, y) # plot line
107 | # plt.scatter(x, y) # plot scatter
108 | plt.show()
109 |
110 |
111 | def plot_orthogonal():
112 | arr = np.array([[-0.85389096, -0.52045195], [0.52045195, -0.85389096]])
113 | # arr = np.array([[1, -1], [1, 1]])
114 | v1_x, v2_x = [arr[:, 0][0], 0], [arr[:, 1][0], 0]
115 | v1_y, v2_y = [arr[:, 0][1], 0], [arr[:, 1][1], 0]
116 | plt.plot(v1_x, v1_y)
117 | plt.plot(v2_x, v2_y)
118 | # plt.axis([-1, 1, -1, 1])
119 | # set the below bound, or the line won't seem orthogonal
120 | # plt.axis([-0.85389096, 0.52045195, -0.85389096, 0.52045195])
121 | plt.show()
122 |
123 |
124 | if __name__ == '__main__':
125 | plot_orthogonal()
126 | # plot_2d()
127 | # basic_linear()
128 | # basic_curve()
129 | # multi_curve()
130 | # multi_figure()
131 | # multi_figure_two()
132 | # histogram()
133 | # histogram_two()
134 | pass
135 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/neural_network_keras/nn_keras_digits.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | """
3 | This file is mainly use keras to recognize digits,with
4 | Multi-Layer perceptrons or neural network
5 | """
6 | from keras.datasets import mnist
7 | import gzip
8 | import matplotlib.pyplot as plt
9 | import sys
10 | # from six.moves import cPickle
11 | import cPickle
12 | import numpy as np
13 | from keras.models import Sequential
14 | from keras.layers import Dense, Activation
15 | from keras.layers import Dropout
16 | from keras.utils import np_utils
17 |
18 |
19 | def load_data():
20 | """
21 | this function is used to load data
22 | :return:
23 | """
24 | file = '../dataset/mnist/mnist.pkl.gz'
25 | f = gzip.open(file, 'rb')
26 | if sys.version_info < (3,):
27 | data = cPickle.load(f)
28 | else:
29 | data = cPickle.load(f, encoding='bytes')
30 | f.close()
31 | return data
32 |
33 |
34 | def show_image():
35 | """
36 | this function is for a test to show, server image
37 | :return:
38 | """
39 | (X_train, y_train), (X_validation, y_validation), (X_test, y_test) = load_data()
40 | # (X_train, y_train), (X_test, y_test) = mnist.load_data()
41 | # plot 4 images as gray scale
42 | plt.subplot(221)
43 | plt.imshow(X_train[0], cmap=plt.get_cmap('gray'))
44 | plt.subplot(222)
45 | plt.imshow(X_train[1], cmap=plt.get_cmap('gray'))
46 | plt.subplot(223)
47 | plt.imshow(X_train[2], cmap=plt.get_cmap('gray'))
48 | plt.subplot(224)
49 | plt.imshow(X_train[3], cmap=plt.get_cmap('gray'))
50 | # show the plot
51 | plt.show()
52 |
53 |
54 | def generate_data():
55 | (X_train, y_train), (X_test, y_test) = load_data()
56 |
57 | # flatten 28*28 images to a 784 vector for each image
58 | print X_train.shape[1], X_train.shape[2], X_train.shape
59 | # X_train.shape -> (60000L, 28L, 28L)
60 | num_pixels = X_train.shape[1] * X_train.shape[2]
61 | X_train = X_train.reshape(X_train.shape[0], num_pixels).astype('float32')
62 | X_test = X_test.reshape(X_test.shape[0], num_pixels).astype('float32')
63 |
64 | # normalize inputs from 0-255 to 0-1
65 | X_train = X_train / 255
66 | X_test = X_test / 255
67 |
68 | y_train = np_utils.to_categorical(y_train)
69 | y_test = np_utils.to_categorical(y_test)
70 | # print y_train.shape, y_test.shape
71 | # y_train.shape -> (60000L, 10L), y_test.shape -> (10000L, 10L)
72 | num_classes = y_test.shape[1]
73 |
74 | return X_train, y_train, X_test, y_test
75 |
76 |
77 | def baseline_model():
78 | """
79 | define baseline model
80 | :return:
81 | """
82 | # create model
83 | model = Sequential()
84 |
85 | num_pixels = 784
86 | # model.add(Dense(num_pixels, input_dim=num_pixels, init='normal', activation='relu'))
87 | model.add(Dense(num_pixels, input_dim=num_pixels, activation='relu'))
88 | num_classes = 10
89 | # model.add(Dense(num_classes, init='normal', activation='softmax'))
90 | model.add(Dense(num_classes, activation='softmax'))
91 | # Compile model
92 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
93 | return model
94 |
95 |
96 | def train_and_evaluate():
97 | X_train, y_train, X_test, y_test = generate_data()
98 | model = baseline_model()
99 | model.fit(X_train, y_train, validation_data=(X_test, y_test), nb_epoch=10, batch_size=200, verbose=2)
100 | # Final evaluation of the model
101 | scores = model.evaluate(X_test, y_test, verbose=0)
102 | print("Baseline Error: %.2f%%" % (100-scores[1]*100))
103 |
104 |
105 | if __name__ == '__main__':
106 | # load_data()
107 | generate_data()
108 | # train_and_evaluate()
109 | pass
110 |
--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_pivot.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 |
3 | import numpy as np
4 | import pandas as pd
5 | import seaborn as sns
6 | import matplotlib.pyplot as plt
7 | sns.set() # use Seaborn styles
8 |
9 |
10 | def titanic_1():
11 | titanic = sns.load_dataset('titanic')
12 | print titanic.head()
13 | # survived pclass sex age ......
14 | # 0 0 male 22
15 | # 1 1 1 female 38.0
16 | # 2 1 3 female 26.0
17 | # 3 1 1 female 35.0
18 | # 4 0 3 male 35.0
19 |
20 | print titanic.groupby('sex')[['survived']].mean()
21 | # survived
22 | # sex
23 | # female 0.742038
24 | # male 0.188908
25 |
26 | print titanic.groupby(['sex', 'class'])['survived'].aggregate('mean').unstack()
27 | # class First Second Third
28 | # sex
29 | # female 0.968085 0.921053 0.500000
30 | # male 0.368852 0.157407 0.135447
31 |
32 | print titanic.pivot_table('survived', index='sex', columns='class')
33 | # class First Second Third
34 | # sex
35 | # female 0.968085 0.921053 0.500000
36 | # male 0.368852 0.157407 0.135447
37 |
38 | age = pd.cut(titanic['age'], [0, 18, 80])
39 | print titanic.pivot_table('survived', ['sex', age], 'class')
40 | # class First Second Third
41 | # sex age
42 | # female (0, 18] 0.909091 1.000000 0.511628
43 | # (18, 80] 0.972973 0.900000 0.423729
44 | # male (0, 18] 0.800000 0.600000 0.215686
45 | # (18, 80] 0.375000 0.071429 0.133663
46 |
47 | print titanic.pivot_table(index='sex', columns='class',
48 | aggfunc={'survived': sum, 'fare': 'mean'})
49 |
50 | print titanic.pivot_table('survived', index='sex', columns='class', margins=True)
51 | # class First Second Third All
52 | # sex
53 | # female 0.968085 0.921053 0.500000 0.742038
54 | # male 0.368852 0.157407 0.135447 0.188908
55 | # All 0.629630 0.472826 0.242363 0.383838
56 |
57 |
58 | def births_demo():
59 | path = 'E:/python_code/births.csv'
60 | births = pd.read_csv(path)
61 | print births.head()
62 | # year month day gender births
63 | # 0 1969 1 1 F 4046
64 | # 1 1969 1 1 M 4440
65 | # 2 1969 1 2 F 4454
66 | # 3 1969 1 2 M 4548
67 | # 4 1969 1 3 F 4548
68 |
69 | births['decade'] = 10 * (births['year'] // 10)
70 | print births.pivot_table('births', index='decade', columns='gender', aggfunc='sum')
71 | # gender F M
72 | # decade
73 | # 1960 1753634 1846572
74 | # 1970 16263075 17121550
75 | # 1980 18310351 19243452
76 | # 1990 19479454 20420553
77 | # 2000 18229309 19106428
78 |
79 | births.pivot_table('births', index='year', columns='gender', aggfunc='sum').plot()
80 |
81 | plt.ylabel('total births per year')
82 | plt.show()
83 |
84 | # create a datetime index from the year, month, day
85 | births.index = pd.to_datetime(10000 * births.year +
86 | 100 * births.month +
87 | births.day, format='%Y%m%d')
88 |
89 | births['dayofweek'] = births.index.dayofweek
90 | births.pivot_table('births', index='dayofweek',
91 | columns='decade', aggfunc='mean').plot()
92 |
93 | plt.gca().set_xticklabels(['Mon', 'Tues', 'Wed', 'Thurs', 'Fri', 'Sat', 'Sun'])
94 |
95 | plt.ylabel('mean births by day')
96 | plt.show()
97 |
98 | if __name__ == '__main__':
99 | births_demo()
100 | # titanic_1()
101 | pass
102 |
--------------------------------------------------------------------------------
/python_utils/thread_process/thread_queue.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | """
3 |
4 | """
5 | import Queue
6 | import threading
7 | import logging
8 | import random
9 | import time
10 | from basic_thread import join_all_others_thread
11 | logging.basicConfig(level=logging.DEBUG,
12 | format='%(levelname)s %(asctime)s %(threadName)s %(message)s',
13 | datefmt='%Y-%m-%d %I:%M:%S')
14 | lst_que = Queue.Queue()
15 |
16 |
17 | def produce_item():
18 | return threading.currentThread().name, random.randint(0, 10)
19 | pass
20 |
21 |
22 | def producer(num):
23 | for i in xrange(num):
24 | item = produce_item()
25 | lst_que.put(item)
26 | logging.info('produce item : ' + str(item))
27 | time.sleep(0.5)
28 |
29 |
30 | def consume():
31 | while True:
32 | try:
33 | # non-block if lst_queue is empty then, it will raise Empty error
34 | item = lst_que.get(False)
35 | if item:
36 | logging.debug('consume item: ' + str(item))
37 | time.sleep(0.5)
38 | except Queue.Empty, e:
39 | # if lst_que is empty then do the following code snippet
40 | logging.warn('queue empty ' + str(e) + 'now sleep 1 S')
41 | time.sleep(1)
42 |
43 |
44 | def create_mul_thread(thread_num, prefix_name, target_name):
45 | """
46 | A template of creating and starting n thread, do the same task.
47 | :param thread_num: the num of thread
48 | :param prefix_name:
49 | :param target_name:
50 | :return:
51 | """
52 | for i in xrange(thread_num):
53 | t_name = prefix_name + str(i)
54 | produce_num = random.randint(10, 100)
55 | if prefix_name == 'consume--':
56 | t = threading.Thread(name=t_name, target=target_name)
57 | else:
58 | t = threading.Thread(name=t_name, target=target_name, args=(produce_num, ))
59 | t.start()
60 |
61 |
62 | def create_mul_thread_producer(num):
63 | for i in xrange(num):
64 | t_name = 'producer--' + str(i)
65 | produce_num = random.randint(10, 100)
66 | t = threading.Thread(name=t_name, target=producer, args=(produce_num, ))
67 | t.start()
68 |
69 |
70 | def test_consume_produce_queue():
71 | produce_num, consume_num = 2, 3
72 | # create 2 producer thread
73 | create_mul_thread(produce_num, 'producer--', producer)
74 | # create 3 consumer thread
75 | create_mul_thread(consume_num, 'consume--', consume)
76 | pass
77 |
78 |
79 | def consume_echo():
80 | logging.info('set gpu mode, load caffe net')
81 | while True:
82 | item = lst_que.get(True)
83 | logging.info('recognize %s', item)
84 |
85 |
86 | def get_input_text():
87 | while True:
88 | text = raw_input("please input a sentence")
89 | lst_que.put(text, True)
90 |
91 | if 'exit' == text:
92 | break
93 |
94 |
95 | def create_echo_cp():
96 | """
97 | create consume_echo, get_input_text thread
98 | """
99 | c1 = threading.Thread(name="c1", target=consume_echo)
100 | c2 = threading.Thread(name="c2", target=consume_echo)
101 | p1 = threading.Thread(name="p1", target=get_input_text)
102 | c1.start()
103 | c2.start()
104 | p1.start()
105 | join_all_others_thread()
106 |
107 |
108 | def get_que_len():
109 | global lst_que
110 |
111 | lst_que.put('abc')
112 | print lst_que.qsize()
113 | lst_que.put('abc')
114 | lst_que.put('abc')
115 | print lst_que.qsize()
116 | lst_que.put('abc')
117 | lst_que.put('abc')
118 | print lst_que.qsize()
119 |
120 | if __name__ == '__main__':
121 | # test_consume_produce_queue()
122 | # create_echo_cp()
123 | s = 'abc'
124 | s = s + '123'
125 | print s
126 | pass
127 |
--------------------------------------------------------------------------------
/python_utils/numpy_operate/array_create.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 |
3 | """
4 | This file is about ways to create different types of np.array(),
5 | like identity, diagonal matrix and so on.
6 | """
7 |
8 | import numpy as np
9 |
10 |
11 | def common_create():
12 | """
13 | common way of creating array
14 | :return: none
15 | """
16 | arr1 = np.array([1, 2])
17 | print arr1
18 | # [1 2]
19 | print arr1.shape
20 | # (2L,)
21 | arr2 = np.array([[1, 2], [3.1, 4.]])
22 | print arr2
23 | # [[ 1. 2. ]
24 | # [ 3.1 4. ]]
25 | print arr2.shape
26 | # (2L, 2L)
27 | arr3 = np.array([[1, 2], [3, 4]], dtype=complex)
28 | print arr3
29 | # [[ 1.+0.j 2.+0.j]
30 | # [ 3.+0.j 4.+0.j]]
31 |
32 |
33 | def about_shape():
34 | print np.array([[1, 2, 3], [3, 4, 5]]).shape
35 | # (2L, 3L)
36 | print np.array([[1, 2, 3]]).shape
37 | # (1L, 3L)
38 | print np.array([1, 2, 3]).shape
39 | # (3L,)
40 | arr1 = np.array([[1, 2, 3], [3, 4, 5]])
41 | arr2 = np.array([1, 2, 3])
42 | print arr1 * arr2
43 | # [[ 1 4 9]
44 | # [ 3 8 15]]
45 | # arr22 = np.array([[1], [2], [3]])
46 | # print arr1 * arr22
47 |
48 | print np.dot(arr1, arr2)
49 | # [14 26]
50 |
51 |
52 | def about_reshape():
53 | arr = np.array([[1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15]])
54 | print arr.reshape(2, 6)
55 | # [[ 1 2 3 4 5 6]
56 | # [10 11 12 13 14 15]]
57 | b = np.arange(1, 13).reshape((2, 2, 3))
58 | print b
59 | # [[[ 1 2 3]
60 | # [ 4 5 6]]
61 | #
62 | # [[ 7 8 9]
63 | # [10 11 12]]]
64 | print b.reshape((2, 6))
65 | # [[ 1 2 3 4 5 6]
66 | # [ 7 8 9 10 11 12]]
67 |
68 |
69 | def lst_2_array():
70 | """
71 | list, tuple to array
72 | :return: none
73 | """
74 | tp = (1, 2, 3)
75 | lst = [[1, 2], [3, 4]]
76 | print np.array(lst).shape
77 | # (2L, 2L)
78 | print np.array(lst)
79 | # [[1 2]
80 | # [3 4]]
81 | print np.asarray(lst)
82 | # [[1 2]
83 | # [3 4]]
84 | print np.asarray(tp)
85 | # [1 2 3]
86 |
87 |
88 | def file_2np_arr():
89 | """txt file to numpy array"""
90 | data_path = '../machine_learn/dataset/perception/dataset.txt'
91 | x = np.loadtxt(data_path, dtype=float)
92 | print x
93 | # [ [ 1.1 2.2 0]
94 | # [ 3.5 3.6 1]]
95 |
96 |
97 | def empty_arr():
98 | arr1 = np.arange(12).reshape(3, 4)
99 | print arr1
100 | # [[ 0 1 2 3]
101 | # [ 4 5 6 7]
102 | # [ 8 9 10 11]]
103 | arr2 = np.empty(arr1.shape)
104 | print arr2
105 | # [[ 0. 0. 0. 0.]
106 | # [ 0. 0. 0. 0.]
107 | # [ 0. 0. 0. 0.]]
108 |
109 |
110 | def test_ndim():
111 | # Number of array dimensions.
112 |
113 | x = np.array([1, 2, 3])
114 | print x.ndim
115 | # 1
116 |
117 | y = np.array([[1, 2, 3], [4, 5, 6]])
118 | print y.ndim
119 | # 2
120 |
121 | z = np.arange(12).reshape((2, 2, 3))
122 | print z.ndim
123 | # 3
124 |
125 |
126 | if __name__ == '__main__':
127 | test_ndim()
128 | # empty_arr()
129 | # file_2np_arr()
130 | # broadcast_demo()
131 | # about_shape()
132 | # common_create()
133 | # lst_2_array()
134 | # about_reshape()
135 | lst = [[1.1, 2.3, 3], [3, 4, 5]]
136 | arr = np.array(lst)
137 | # print arr[1, :]
138 | # [ 3. 4. 5.]
139 | # print arr[1]
140 | # [ 3. 4. 5.]
141 | # print arr[...]
142 |
143 | a = np.array([[1.1, 2.3, 3]])
144 | # print a[0].tolist()
145 | # print "".join([str(i) + "-" for i in a[0].tolist()])
146 | # 1.1-2.3-3.0-
147 | # if 4 in [1, 3, 5]:
148 | # print 'in it'
149 | # print arr[0, 1], arr[0, 1].flatten.A[0]
150 | # print np.array(lst)[:-1]
151 | pass
152 |
--------------------------------------------------------------------------------
/python_utils/utils/JsonUtil.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | import json
3 | import codecs
4 |
5 |
6 | def byteify(input):
7 | """
8 | the string of json typed unicode to str in python
9 | This function coming from stack overflow
10 | :param input: {u'first_name': u'Guido', u'last_name': u'jack'}
11 | :return: {'first_name': 'Guido', 'last_name': 'jack'}
12 | """
13 | if isinstance(input, dict):
14 | return {byteify(key): byteify(value)
15 | for key, value in input.iteritems()}
16 | elif isinstance(input, list):
17 | return [byteify(element) for element in input]
18 | elif isinstance(input, unicode):
19 | return input.encode('utf-8')
20 | else:
21 | return input
22 |
23 |
24 | def get_json_from_file(filename):
25 | """注意如果自己创建的json.txt文件读取有问题,可以在pycharm中创建file.json然后再读取"""
26 | with open(filename) as jf:
27 | jsondata = json.load(jf)
28 |
29 | return byteify(jsondata)
30 |
31 |
32 | def put_unicode_to_str():
33 | data = {"ocrMsg": u"\u556a\u556a\u76f4\u64ad\u514d\u8d39\u8bd5"}
34 | js = json.dumps(data, indent=4, ensure_ascii=False)
35 | print js
36 |
37 |
38 | def generate_keyword_jsonfile(lst):
39 | """
40 | generate keyword_jsonfile 'search_keywords.json' to
41 | the directory config
42 | :return:
43 | """
44 |
45 | js_data = {"0": [], "1": [], "2": []}
46 | for item in lst:
47 | id, value = str(item[0]), item[1]
48 | js_data[id].append(value)
49 |
50 | with codecs.open('../config/search_keywords.json', 'w', encoding='utf-8') as fp:
51 | # json.dump(js_data, fp)
52 | fp.write(json.dumps(js_data, indent=4, sort_keys=True, ensure_ascii=False))
53 |
54 | if __name__ == '__main__':
55 | # generate_keyword_jsonfile()
56 | # print get_json_from_file('../config/search_keywords.json')
57 | js = {"aaData":[ {"id":21,"keyword":"\u8D85\u7BA1","keywordType":0}, {"id":43,"keyword":"\u516C\u5B89","keywordType":0}, {"id":44,"keyword":"\u519B\u88C5","keywordType":1}, {"id":45,"keyword":"\u66B4\u529B","keywordType":1}, {"id":46,"keyword":"\u519B\u670D","keywordType":2}, {"id":47,"keyword":"\u9732\u4E73","keywordType":2},], "data":[{"$ref":"$.aaData[0]"},{"$ref":"$.aaData[1]"}, {"$ref":"$.aaData[2]"},{"$ref":"$.aaData[3]"}, {"$ref":"$.aaData[2]"},{"$ref":"$.aaData[3]"}], "error":False,"iTotalDisplayRecords":6,"iTotalRecords":6,"recordsFiltered":6,"recordsTotal":6,"sEcho":"1","success":True}
58 | js = {"aaData":[{"id":195261,"keywordCore":"直播","keywordDepartment":"YY","keywordWarn":"裸聊","newsDate":"2017-02-28 20:14:01","newsTitle":"美女直播","updateKeywordDate":"2017-02-28 20:14:01","url":"http://www.junjiewang.com\/44756.html","webSrc":"junjie"}, {"id":195258,"keywordCore":"直播","keywordDepartment":"YY","keywordWarn":"裸聊","newsDate":"2017-02-28 20:13:58","newsTitle":"美女直播","updateKeywordDate":"2017-02-28 20:13:58","url":"http:www.junjiewang.com/45345.html","webSrc":"junjie"},], "contentList":[{"contentNum":67,"dateDay":"2017-01-08"},{"contentNum":20,"dateDay":"2017-01-09"}], "data":[{"$ref":"$.aaData[0]"},{"$ref":"$.aaData[1]"}], "error":False,"iTotalDisplayRecords":2,"iTotalRecords":2,"recordsFiltered":2,"recordsTotal":2,"sEcho":"3","success":True, "warnList":[{"contentNum":28,"dateDay":"2017-01-08"},{"contentNum":8,"dateDay":"2017-01-09"}]}
59 | js = {"aaData":[{"id":195261,"keywordCore":"\u76F4\u64AD","keywordDepartment":"YY","keywordWarn":"\u88F8\u804A","newsDate":"2017-02-28 20:14:01","newsTitle":"\u76F4\u64AD\u65B0\u89C4\u4ECA\u8D77\u5B9E\u65BD\uFF0C\u4F60\u5E94\u8BE5\u77E5\u9053\u7684\u516D\u4E2A\u95EE\u9898","updateKeywordDate":"2017-02-28 20:14:01","url":"http:\/\/www.junjiewang.com\/hulianwang\/44756.html","webSrc":"junjie"}],"data":[{"$ref":"$.aaData[0]"}],"error":False,"iTotalDisplayRecords":1,"iTotalRecords":1,"recordsFiltered":1,"recordsTotal":1,"sEcho":"11","success":True}
60 | # print json.dumps(js, indent=4)
61 | put_unicode_to_str()
62 |
63 |
--------------------------------------------------------------------------------
/python_utils/machine_learn/perception/perception.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | import numpy as np
3 | from utils.FileUtil import get_line_lst
4 | import matplotlib.pyplot as plt
5 |
6 |
7 | class Perception(object):
8 |
9 | def __init__(self, var_num):
10 | # self.w = np.random.randn(1, var_num)
11 | self.w = np.ones(var_num)
12 | self.b = 1
13 | self.var_num = var_num
14 | self.min_error_rate = 0.02
15 |
16 | def train(self, train_data, eta):
17 | """
18 | training model
19 | :param train_data: array like [[1, 2, 0], [1.1, 0.8, 1]]
20 | :param eta: learning rate:
21 | :return none:
22 | """
23 | for item in train_data:
24 | output = (np.dot(self.w, item[0:-1]) + self.b)*item[-1]
25 | if output <= 0:
26 | self.w += eta * item[-1] * item[0:-1]
27 | self.b += eta * item[-1]
28 |
29 | def sgd(self, train_data, epoch, eta, batch_size):
30 | """
31 | Training perception model by stochastic gradient descent
32 | :param train_data: 2D array like [[1.1, 2.3, -1]] the last
33 | item -1 train_date[0][-1] means label
34 | :param epoch:
35 | :param eta:learning rate
36 | :return:none
37 | """
38 | for i in xrange(epoch):
39 | np.random.shuffle(train_data)
40 | batch_lst = [train_data[k:k+batch_size] for k in xrange(0, len(train_data), batch_size)]
41 | for mini_batch in batch_lst:
42 | self.train(mini_batch, eta)
43 |
44 | current_error_rate = self.get_error_rate(train_data)
45 | print 'epoch {0} current_error_rate: {1}'.format(i+1, current_error_rate)
46 | print self.get_current_para()
47 | if current_error_rate <= self.min_error_rate:
48 | break
49 |
50 | def get_error_rate(self, validate_data):
51 | all_len = validate_data.shape[0]
52 | error_len = 0
53 | for item in validate_data:
54 | output = np.dot(self.w, item[0:-1]) + self.b
55 | output = 1 if output >= 0 else -1
56 | error = True if output != item[-1] else False
57 | if error:
58 | error_len += 1
59 |
60 | return float(error_len) / all_len
61 |
62 | def get_current_para(self):
63 | return self.w, self.b
64 |
65 | def get_weight(self):
66 | return self.w
67 |
68 | def get_bias(self):
69 | return self.b
70 |
71 |
72 | def generate_data(data_path):
73 | lst_data = get_line_lst(data_path)
74 |
75 | # lst_ret = []
76 | # for item in lst_data:
77 | # lst_ret.append([float(s) for s in item.split()])
78 | # the following one line is equivalent to the above for loop
79 | lst_ret = [[float(s) for s in item.split()] for item in lst_data]
80 |
81 | ret_arr = np.array(lst_ret)
82 |
83 | # change all the label whose value is 0 to -1
84 | for i in xrange(ret_arr.shape[0]):
85 | if ret_arr[i][-1] == 0:
86 | ret_arr[i][-1] = -1
87 |
88 | return ret_arr
89 |
90 |
91 | def plot_data_scatter(train_data, w, b):
92 | x = np.linspace(-5, 5, 10)
93 | plt.figure()
94 | # 画散点图(plot scatter)
95 | for i in range(len(train_data)):
96 | if train_data[i][-1] == 1:
97 | plt.scatter(train_data[i][0], train_data[i][1], c=u'b')
98 | else:
99 | plt.scatter(train_data[i][0], train_data[i][1], c=u'r')
100 |
101 | # 画感知机分类,slope斜率图
102 | plt.plot(x, -(w[0]*x+b) / w[1], c=u'r')
103 | plt.show()
104 |
105 | if __name__ == '__main__':
106 | data_path = '../dataset/perception/dataset.txt'
107 | train_data = generate_data(data_path)
108 | epoch, eta, var_num, batch_size = 100, 0.1, 2, 20
109 | p = Perception(var_num)
110 | p.sgd(train_data, epoch, eta, batch_size)
111 | plot_data_scatter(train_data, p.get_weight(), p.get_bias())
112 |
--------------------------------------------------------------------------------
/python_utils/al_lt_common/al_cv.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import numpy as np
4 | import cv2
5 |
6 |
7 | def NMS(bboxes, threshold=0.5, model='union'):
8 | """
9 | Non max suppression
10 | :param bboxes: tensor bounding boxes and scores sized [N, 5]
11 | :param threshold:float overlap threshold
12 | :param model: str 'union', 'min'
13 | :return:
14 | bboxes after nms
15 | picked indices
16 | """
17 | x1 = bboxes[:, 0]
18 | y1 = bboxes[:, 1]
19 | x2 = bboxes[:, 2]
20 | y2 = bboxes[:, 3]
21 | scores = bboxes[:, 4]
22 |
23 | # all the box areas
24 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 | # descending order scores
26 | orders = np.argsort(-scores)
27 |
28 | # store the saving indices of the bounding box
29 | keep_idx = []
30 |
31 | while len(orders) > 0:
32 | idx = orders[0]
33 | keep_idx.append(idx)
34 |
35 | # tensor operator, compute all the intersect with the max score area
36 | xx1 = np.maximum(x1[idx], x1[orders[1:]])
37 | yy1 = np.maximum(y1[idx], y1[orders[1:]])
38 | xx2 = np.minimum(x2[idx], x2[orders[1:]])
39 | yy2 = np.minimum(y2[idx], y2[orders[1:]])
40 |
41 | w = np.maximum(0.0, xx2 - xx1 + 1)
42 | h = np.maximum(0.0, yy2 - yy1 + 1)
43 | # get all the intersect area, note this is tensor operator
44 | inter = w * h
45 |
46 | # compute the ratio of overlap, note this is tensor operator
47 | overlap_ratio = inter / (areas[idx] + areas[orders[1:]] - inter)
48 |
49 | inds = np.where(overlap_ratio <= threshold)[0]
50 | orders = orders[inds + 1] # add 1, because the first is the keep index
51 |
52 | return bboxes[keep_idx], keep_idx
53 |
54 |
55 | def draw_rect_score(img, lst_box):
56 | # draw rectangle, and score into img
57 | for box in lst_box:
58 | p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
59 | cv2.rectangle(img, p1, p2, (0, 0, 255), 2)
60 | score = str(box[4])
61 | cv2.putText(img, score, p1, cv2.FONT_HERSHEY_SIMPLEX, 1.1, (255, 0, 0), 2)
62 |
63 |
64 | def compute_iou(box1, box2):
65 | """
66 | Compute iou rate of two box.
67 | :param box1: lst [x1, y1, x2, y2, score]
68 | :param box2: like box1
69 | :return: float iou rate
70 | """
71 | # compute iou area x1, y1, x2, y2
72 | xx1 = max(box1[0], box2[0])
73 | yy1 = max(box1[1], box2[1])
74 | xx2 = min(box1[2], box2[2])
75 | yy2 = min(box1[3], box2[3])
76 |
77 | # compute intersect area
78 | w, h = max(xx2 - xx1 + 1, 0.0), max(yy2 - yy1 + 1, 0.0)
79 | inter_area = w * h
80 |
81 | area1 = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1)
82 | area2 = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1)
83 |
84 | return inter_area / float(area1 + area2 - inter_area)
85 |
86 |
87 | def test_iou_rate():
88 | """Test iou compute.
89 | """
90 | # box group1
91 | box1 = [146, 173, 240, 263]
92 | box2 = [160, 152, 251, 245]
93 | box2 = [174, 196, 266, 282]
94 | print compute_iou(box1, box2) # 0.37405
95 |
96 | # box group2
97 | box1 = [556, 102, 648, 198]
98 | box2 = [570, 133, 670, 228]
99 | print compute_iou(box1, box2) # 0.38613
100 |
101 |
102 | def test_nms():
103 | img_path = 'E:/bolg_img/deeplearn/nms/nms_4.jpg'
104 | lst_box = [[146, 173, 240, 263, 0.98], [160, 152, 251, 245, 0.83],
105 | [174, 196, 266, 282, 0.75],
106 | [556, 102, 648, 198, 0.81],
107 | [570, 133, 670, 228, 0.67]
108 | ]
109 | lst_box = np.array(lst_box)
110 |
111 | img = cv2.imread(img_path)
112 | img_copy = img.copy()
113 | draw_rect_score(img, lst_box)
114 |
115 | lst_box, _ = NMS(lst_box, 0.2)
116 | draw_rect_score(img_copy, lst_box)
117 |
118 | cv2.imshow("img_with_rect", img)
119 | cv2.imshow("nms_img_with_rect", img_copy)
120 |
121 | cv2.waitKey(0)
122 |
123 |
124 | if __name__ == '__main__':
125 | test_iou_rate()
126 | # test_nms()
127 |
128 | arr = np.array([4, 1, 3, 5])
129 | print arr.size
130 |
131 | pass
132 |
--------------------------------------------------------------------------------
/python_utils/matplot/plot_many.py:
--------------------------------------------------------------------------------
1 | # _*_ coding:utf-8 _*_
2 | """
3 | This file is about subplot
4 | mainly cited from `http://matplotlib.org/examples/pylab_examples/subplots_demo.html`
5 | """
6 |
7 | import numpy as np
8 | import matplotlib.pyplot as plt
9 |
10 |
11 | def plot_mul():
12 | left, width = 0.1, 0.8
13 | ax1 = plt.axes([left, 0.5, width, 0.45])
14 | ax2 = plt.axes([left, 0.3, width, 0.19])
15 | ax3 = plt.axes([left, 0.2, width, 0.09], sharex=ax2)
16 | ax4 = plt.axes([left, 0.1, width, 0.09], sharex=ax2)
17 |
18 | # ticks at the top of the top plot
19 | ax1.xaxis.tick_top()
20 | # remove ticks for ax2 and ax3
21 | ax2.xaxis.set_visible(False)
22 | ax3.xaxis.set_visible(False)
23 |
24 | # only put ticks on the bottom of ax4
25 | ax4.xaxis.tick_bottom()
26 | plt.show()
27 |
28 |
29 | def subplot_demo1():
30 | # Simple data to display in various forms
31 | x = np.linspace(0, 2 * np.pi, 400)
32 | y = np.sin(x ** 2)
33 |
34 | # Just a figure and one subplot
35 | f, ax = plt.subplots()
36 | ax.plot(x, y)
37 | ax.set_title('Simple plot')
38 | plt.show()
39 |
40 |
41 | def subplot_demo2():
42 | x = np.linspace(0, 2 * np.pi, 400)
43 | y = np.sin(x ** 2)
44 |
45 | f, ax_arr = plt.subplots(2, sharex=True)
46 | ax_arr[0].plot(x, y)
47 | ax_arr[0].set_title('sharing x axis')
48 | ax_arr[1].scatter(x, y)
49 | plt.show()
50 |
51 |
52 | def subplot_demo3():
53 | # Two subplots, unpack the axes array immediately
54 | x = np.linspace(0, 2 * np.pi, 300)
55 | y = np.sin(x ** 2)
56 |
57 | f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
58 | ax1.plot(x, y)
59 | ax1.set_title('Sharing Y axis')
60 | ax2.scatter(x, y)
61 |
62 | plt.show()
63 |
64 |
65 | def subplots_demo4():
66 | x = np.linspace(0, 2 * np.pi, 300)
67 | y = np.sin(x ** 2)
68 |
69 | f, (ax1, ax2, ax3) = plt.subplots(3, sharex=True)
70 | ax1.plot(x, y)
71 | ax1.set_title('sharing x axis')
72 |
73 | ax2.scatter(x, y)
74 |
75 | x1 = [1, 2, 3, 4, 5, 6, 7]
76 | y1 = [2.6, 3.6, 8.3, 56, 12.7, 8.9, 5.3]
77 | ax3.scatter(x1, y1)
78 | plt.show()
79 |
80 |
81 | def subplots_demo5():
82 | x = np.linspace(0, 2 * np.pi, 300)
83 | y = np.sin(x ** 2)
84 | f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharex='col', sharey='row')
85 | ax1.plot(x, y)
86 | ax1.set_title('Sharing x per column, y per row')
87 | ax2.scatter(x, y)
88 | ax2.set_title('ax2')
89 |
90 | ax3.plot(x, 2 * y ** 2 + 1, color='r')
91 | ax3.set_title('ax3')
92 | ax4.plot(x, 2 * y ** 2 + 1, color='b')
93 | ax4.set_title('ax4')
94 |
95 | plt.show()
96 | pass
97 |
98 |
99 | def subplots_demo6():
100 | x = np.linspace(0, 2 * np.pi, 300)
101 | y = np.sin(x ** 2)
102 | f, ax_arr = plt.subplots(2, 2)
103 |
104 | ax_arr[0, 0].plot(x, y)
105 | ax_arr[0, 0].set_title('axis 0, 0')
106 |
107 | ax_arr[0, 1].scatter(x, y)
108 | ax_arr[0, 1].set_title('axis 0, 1')
109 |
110 | ax_arr[1, 0].plot(x, y ** 2)
111 | ax_arr[1, 0].set_title('axis 1, 0')
112 |
113 | ax_arr[1, 1].scatter(x, y ** 2)
114 | ax_arr[1, 1].set_title('axis 1, 1')
115 | # for row 0, every element x axis hidden
116 | plt.setp([ax.get_xticklabels() for ax in ax_arr[0, :]], visible=False)
117 | # for column 1, every element y axis hidden
118 | plt.setp([ax.get_yticklabels() for ax in ax_arr[:, 1]], visible=False)
119 | plt.show()
120 |
121 |
122 | def plot_fun():
123 | x = np.linspace(0.0, 1.0, 1000)
124 | plt.plot(x, -1 * np.log2(x) * x)
125 | plt.xlim(0, 1)
126 | plt.ylim(0, 1)
127 | plt.show()
128 |
129 |
130 | def plot_sigmoid():
131 | def sigmoid(x):
132 | return 1.0 / (1 + np.exp(x))
133 |
134 | x = np.arange(-10., 10., 0.1)
135 | plt.plot(x, sigmoid(x))
136 | plt.show()
137 |
138 |
139 | if __name__ == '__main__':
140 | plot_sigmoid()
141 | # plot_fun()
142 | # plot_mul()
143 | # subplot_demo1()
144 | # subplot_demo2()
145 | # subplot_demo3()
146 | # subplots_demo4()
147 | # subplots_demo5()
148 | # subplots_demo6()
149 | pass
150 |
--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/pd_miss_data.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import numpy as np
4 | import pandas as pd
5 |
6 |
7 | def miss_series():
8 | data = pd.Series([1, np.nan, 'hello', None])
9 |
10 | print data.isnull()
11 | # 0 False
12 | # 1 True
13 | # 2 False
14 | # 3 True
15 | # dtype: bool
16 | print data[data.notnull()]
17 | # 0 1
18 | # 2 hello
19 | # dtype: object
20 | print data.dropna()
21 | # 0 1
22 | # 2 hello
23 | # dtype: object
24 |
25 |
26 | def miss_df():
27 | df = pd.DataFrame([[1, np.nan, 2],
28 | [2, 3, 5],
29 | [np.nan, 4, 6]])
30 |
31 | print df
32 | # 0 1 2
33 | # 0 1.0 NaN 2
34 | # 1 2.0 3.0 5
35 | # 2 NaN 4.0 6
36 | print df.dropna()
37 | # 0 1 2
38 | # 1 2.0 3.0 5
39 | print df.dropna(axis=1)
40 | # 2
41 | # 0 2
42 | # 1 5
43 | # 2 6
44 |
45 | df[3] = np.nan
46 | print df
47 | # 0 1 2 3
48 | # 0 1.0 NaN 2 NaN
49 | # 1 2.0 3.0 5 NaN
50 | # 2 NaN 4.0 6 NaN
51 | print df.dropna(axis='columns', how='all')
52 | # 0 1 2
53 | # 0 1.0 NaN 2
54 | # 1 2.0 3.0 5
55 | # 2 NaN 4.0 6
56 | print df.dropna(axis='rows', thresh=3)
57 | # 0 1 2 3
58 | # 1 2.0 3.0 5 NaN
59 |
60 |
61 | def fill_series():
62 | data = pd.Series([1, np.nan, 2, None, 3], index=list('abcde'))
63 | print data
64 | # a 1.0
65 | # b NaN
66 | # c 2.0
67 | # d NaN
68 | # e 3.0
69 | # dtype: float64
70 | data.fillna(0)
71 | print data
72 | # a 1.0
73 | # b NaN
74 | # c 2.0
75 | # d NaN
76 | # e 3.0
77 | # dtype: float64
78 |
79 | # We can specify a forward-fill to propagate the previous value forward:
80 | print data.fillna(method='ffill')
81 | # a 1.0
82 | # b 1.0
83 | # c 2.0
84 | # d 2.0
85 | # e 3.0
86 | # dtype: float64
87 |
88 | # Or we can specify a back-fill to propagate the next values backward:
89 | print data.fillna(method='bfill')
90 | # a 1.0
91 | # b 2.0
92 | # c 2.0
93 | # d 3.0
94 | # e 3.0
95 | # dtype: float64
96 |
97 |
98 | def fill_df():
99 | df = pd.DataFrame([[1, np.nan, 2],
100 | [2, 3, 5],
101 | [np.nan, 4, 6]])
102 |
103 | df[3] = np.nan
104 | print df
105 | # 0 1 2 3
106 | # 0 1.0 NaN 2 NaN
107 | # 1 2.0 3.0 5 NaN
108 | # 2 NaN 4.0 6 NaN
109 |
110 | print df.fillna(method='ffill', axis=1)
111 | # 0 1 2 3
112 | # 0 1.0 1.0 2.0 2.0
113 | # 1 2.0 3.0 5.0 5.0
114 | # 2 NaN 4.0 6.0 6.0
115 |
116 |
117 | def drop_specify():
118 | data = {'name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
119 | 'year': [2012, 2012, 2013, 2014, 2014],
120 | 'reports': [4, 24, 31, 2, 3]}
121 |
122 | df = pd.DataFrame(data,
123 | index=['Cochice', 'Pima', 'Santa Cruz', 'Maricopa', 'Yuma'])
124 |
125 | print df
126 | # name reports year
127 | # Cochice Jason 4 2012
128 | # Pima Molly 24 2012
129 | # Santa Cruz Tina 31 2013
130 | # Maricopa Jake 2 2014
131 | # Yuma Amy 3 2014
132 |
133 | print df.drop(['Cochice', 'Pima'])
134 | # name reports year
135 | # Santa Cruz Tina 31 2013
136 | # Maricopa Jake 2 2014
137 | # Yuma Amy 3 2014
138 |
139 | print df.drop('reports', axis=1)
140 | # name year
141 | # Cochice Jason 2012
142 | # Pima Molly 2012
143 | # Santa Cruz Tina 2013
144 | # Maricopa Jake 2014
145 | # Yuma Amy 2014
146 |
147 | print df[df.name != 'Tina']
148 |
149 | print df.drop(df.index[2])
150 |
151 | print df.drop(df.index[[2, 3]])
152 |
153 | if __name__ == '__main__':
154 | drop_specify()
155 | # fill_df()
156 | # fill_series()
157 | # miss_df()
158 | # miss_series()
159 | pass
160 |
--------------------------------------------------------------------------------
/python_utils/sk_sc_pd_operator/sk_feature_process.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | from sklearn import datasets
4 | from sklearn.feature_selection import RFE
5 | from sklearn.linear_model import LogisticRegression
6 | import numpy as np
7 |
8 |
9 | def get_dummy_data():
10 | x = np.array([[-1, 1, -1, 1, -1, 1],
11 | [0.1, 0.2, 0.3, 0.4, 0.5, 0.6],
12 | [0.6, 0.5, 0.4, 0.3, 0.2, 0.1],
13 | ])
14 |
15 | x = x.T
16 | y = np.array([0, 0, 1, 1, 0, 0])
17 |
18 | return x, y
19 |
20 |
21 | def sk_feature_ref():
22 | # load the iris datasets
23 | dataset = datasets.load_iris()
24 | # create a base classifier used to evaluate a subset of attributes
25 | model_lr = LogisticRegression()
26 | # create the RFE model and select 3 attributes
27 | rfe = RFE(model_lr, 3)
28 | rfe = rfe.fit(dataset.data, dataset.target)
29 | # summarize the selection of the attributes
30 | print rfe.support_
31 | # [False True True True]
32 | print rfe.ranking_
33 | # [2 1 1 1]
34 | print sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), dataset.feature_names))
35 | # [(1.0, 'petal length (cm)'), (1.0, 'petal width (cm)'), (1.0, 'sepal width (cm)'), (2.0, 'sepal length (cm)')]
36 |
37 |
38 | def feature_importance():
39 | from sklearn.ensemble import ExtraTreesClassifier
40 |
41 | dataset = datasets.load_iris()
42 | model = ExtraTreesClassifier()
43 | model.fit(dataset.data, dataset.target)
44 | print zip(dataset.feature_names, map(lambda x: round(x, 2), model.feature_importances_))
45 | # [('sepal length (cm)', 0.13), ('sepal width (cm)', 0.07), ('petal length (cm)', 0.35), ('petal width (cm)', 0.45)]
46 |
47 |
48 | def sk_feature_ref_v2():
49 | X, Y = get_dummy_data()
50 | names = ['f1', 'f2', 'f3']
51 |
52 | model_lr = LogisticRegression()
53 |
54 | rfe = RFE(model_lr, 2)
55 | rfe = rfe.fit(X, Y)
56 |
57 | print rfe.support_
58 | print rfe.ranking_
59 | print sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), names))
60 |
61 |
62 | def test_standard_scaler():
63 | from sklearn.preprocessing import StandardScaler
64 | arr = [-2, -1, 0, 1, 2]
65 | print StandardScaler().fit_transform(arr)
66 | # [-1.414-0.707 0 0.707 1.414]
67 |
68 |
69 | def test_min_max_scaler():
70 | from sklearn.preprocessing import MinMaxScaler
71 | arr = np.array([0, 1, 2, 3, 4])
72 | print MinMaxScaler().fit_transform(arr)
73 | # [ 0. 0.25 0.5 0.75 1. ]
74 |
75 |
76 | def test_normalizer():
77 | from sklearn.preprocessing import Normalizer
78 | arr = np.array([[3, -1],
79 | [-4, 2]])
80 |
81 | print Normalizer().fit_transform(arr)
82 | # [[ 0.9486833 -0.31622777]
83 | # [-0.89442719 0.4472136 ]]
84 |
85 |
86 | def test_binarizer():
87 | from sklearn.preprocessing import Binarizer
88 | arr = np.array([0, 1, 2, 3, 4])
89 | print Binarizer(threshold=2).fit_transform(arr)
90 | # [[0 0 0 1 1]]
91 |
92 |
93 | def test_pearsonr():
94 | from scipy.stats import pearsonr
95 | arr1 = np.arange(0, 12)
96 | arr2 = np.arange(5, 17)
97 | print pearsonr(arr1, arr2)
98 |
99 | x = np.arange(-1, 1, 30)
100 | y = x
101 | print pearsonr(x, y)
102 |
103 |
104 | def rfr_feature_select():
105 | from sklearn.datasets import load_boston
106 | from sklearn.ensemble import RandomForestRegressor
107 | from sklearn.cross_validation import cross_val_score, ShuffleSplit
108 |
109 | boston = load_boston()
110 | X = boston["data"]
111 | Y = boston["target"]
112 | names = boston["feature_names"]
113 |
114 | rf = RandomForestRegressor(n_estimators=20, max_depth=4)
115 | scores = []
116 | for i in range(X.shape[1]):
117 | score = cross_val_score(rf, X[:, i:i + 1],
118 | Y, scoring="r2", cv=ShuffleSplit(len(X), 3, .3))
119 | scores.append((round(np.mean(score), 3), names[i]))
120 |
121 | print sorted(scores, reverse=True)
122 |
123 |
124 | if __name__ == '__main__':
125 | feature_importance()
126 | # rfr_feature_select()
127 | # test_pearsonr()
128 | # test_binarizer()
129 | # test_normalizer()
130 | # test_min_max_scaler()
131 | # test_standard_scaler()
132 | # sk_feature_ref_v2()
133 | # sk_feature_ref()
134 | pass
135 |
--------------------------------------------------------------------------------