├── .gitignore ├── wsgi_benchmarking_2018 ├── Bjoern.wsgi ├── uWSGI.sh ├── Gunicorn.sh ├── app.py ├── Gunicorn-meinheld.sh ├── CherryPy.wsgi └── benchmark.sh ├── py2013 ├── setup.py ├── mysqldb-python2.py ├── rename_files.py ├── user_agent_phantomjs.py ├── try_phantomjs.py ├── date_test.py ├── try_pycurl.py ├── jaymodule.c ├── pyexiv2-sample.py ├── python3-mysql.py ├── iplocation.py └── access_log_analysis.py ├── py2015 ├── map_reduce.py ├── fib_generator.py ├── filter_demo.py ├── my.cfg ├── zero_cnt_of_factorial.py ├── special_vars.py ├── gevent_demo.py ├── args_kargs.py ├── try_configparser.py ├── temp-socket.py ├── try_beautifulsoup4.py ├── try_requests.py ├── yanghui_triangle.py ├── mutiprocessing_pool.py ├── threading_local.py ├── mutiprocessing_queue.py ├── taskworker.py ├── taskmanager.py ├── beer.py ├── android_demo.py ├── unified_order_fabfile.py ├── range_xrange.py └── download_repos.py ├── py2018 ├── mean.py ├── try_glob.py ├── proc_start_time.py └── set_check_localtime.py ├── py2014 ├── try_guppy.py ├── urllib2_test.py ├── mem_profile.py ├── string2dict.py ├── network.py ├── closure.py ├── mac_free.py ├── fibonacci.py ├── vps_fabfile.py ├── app_poi.py ├── paxel.py ├── mult_thread_download.py └── poi.py ├── README.md ├── py2016 ├── try_turtle.py ├── try_argparse.py ├── randomMAC.py ├── draw_a_tree_with_turtle.py ├── for_else.py ├── try_collections.py ├── try_except_else_finally.py ├── selenium_capture_safari.py ├── selenium_capture_firefox.py ├── try_docopt.py ├── cleanup_pid.py ├── rpmdb.py └── SimpleHTTPServerWithUpload.py ├── py2011 ├── socket_client.py ├── socket_server.py ├── get_ip_and_location.py ├── VisitBlog.py ├── get_rpms.py └── get_stock_price.py ├── py-libvirt ├── destroy_domains.py └── libvirt-dominfo.py ├── utils └── run_cmd.py ├── py2020 ├── test_decorator.py └── crawler_douban_movie.py ├── py2017 └── mutiprocessing_pool_return_value.py ├── py2021 ├── my_time_it.py └── test_gc.py ├── py2022 ├── remove_item_in_for_list.py ├── ssh_scp.py └── ssh_cmd.py ├── leetcode ├── medium │ ├── buy_sell_stock_once_max_profit.py │ └── length_of_longest_substring.py └── easy │ ├── intersection.py │ ├── two_num_sum.py │ ├── reverse_list.py │ ├── remove_duplicates.py │ └── word_pattern.py └── py2024 └── calculate_reserved_mem.py /.gitignore: -------------------------------------------------------------------------------- 1 | .*project 2 | .*classpath 3 | build 4 | *.pyc 5 | *.swp 6 | *.log 7 | .DS_Store 8 | .vscode 9 | -------------------------------------------------------------------------------- /wsgi_benchmarking_2018/Bjoern.wsgi: -------------------------------------------------------------------------------- 1 | import bjoern 2 | from app import application 3 | 4 | bjoern.run( 5 | wsgi_app=application, 6 | host='0.0.0.0', 7 | port=8000, 8 | reuse_port=True 9 | ) 10 | -------------------------------------------------------------------------------- /py2013/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | 3 | module1 = Extension('jay', 4 | sources = ['jaymodule.c']) 5 | 6 | setup(name = 'jay', 7 | version = '1.0', 8 | description = 'This is a demo package from Jay.', 9 | ext_modules = [module1]) 10 | -------------------------------------------------------------------------------- /py2015/map_reduce.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # python built-in map()/reduce() exercises. 3 | 4 | print map(lambda x: x.title(), ['adam', 'LISA', 'barT', 'Jay']) 5 | 6 | 7 | def prod(list1): 8 | return reduce(lambda x, y: x * y, list1) 9 | 10 | 11 | list1 = xrange(1, 6) 12 | print prod(list1) 13 | -------------------------------------------------------------------------------- /py2018/mean.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | def mean(numbers): 4 | ''' calculate the mean value of a number list ''' 5 | return float(sum(numbers)) / max(len(numbers), 1) 6 | 7 | 8 | if __name__ == '__main__': 9 | list1 = [134, 134, 34.324, 34354, 11.2] 10 | print mean(list1) 11 | print numpy.mean(list1) 12 | -------------------------------------------------------------------------------- /wsgi_benchmarking_2018/uWSGI.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # The official documentation doesn't have any word on this, 3 | # but this seems to be a common practice 4 | PROCESSOR_COUNT=$(nproc) 5 | THREAD_COUNT=2 6 | 7 | uwsgi --http :8300 --plugin python2 --wsgi-file app.py --processes "$PROCESSOR_COUNT" --threads "$THREAD_COUNT" --disable-logging 8 | -------------------------------------------------------------------------------- /py2015/fib_generator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # make a Fibonacci generator. 3 | 4 | 5 | def fib(max): 6 | n, a, b = 0, 0, 1 7 | while n < max: 8 | yield b 9 | a, b = b, a + b 10 | n = n + 1 11 | 12 | 13 | if __name__ == '__main__': 14 | fib10 = fib(10) 15 | for i in fib10: 16 | print i 17 | -------------------------------------------------------------------------------- /wsgi_benchmarking_2018/Gunicorn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | PROCESSOR_COUNT=$(nproc) 4 | # This formula is recommended in the Gunicorn documentation 5 | # http://docs.gunicorn.org/en/stable/design.html#how-many-workers 6 | GUNICORN_WORKER_COUNT=$(( PROCESSOR_COUNT * 2 + 1 )) 7 | 8 | gunicorn -w ${GUNICORN_WORKER_COUNT} -b 0.0.0.0:8200 app:application 9 | -------------------------------------------------------------------------------- /py2015/filter_demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | # 删除1~100的整数中的素数. 4 | 5 | import math 6 | 7 | 8 | def isNotPrime(n): 9 | flag = False 10 | for i in range(2, int(math.sqrt(n)) + 1): 11 | if n % i == 0: 12 | flag = True 13 | return flag 14 | 15 | for i in filter(isNotPrime, xrange(1, 101)): 16 | print i 17 | -------------------------------------------------------------------------------- /py2014/try_guppy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | Created on May 31, 2014 5 | 6 | @author: Jay 7 | 8 | @description: just try to use Guppy-PE (useing Heapy) for memory profiling. 9 | ''' 10 | 11 | 12 | from guppy import hpy 13 | 14 | a = [8] * (10 ** 6) 15 | 16 | h = hpy() 17 | print h.heap() 18 | print h.heap().more 19 | print h.heap().more.more 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #### This is the README file of python.git in Jay's git repos hosted on github.com 2 | * https://github.com/smilejay/python.git 3 | * git://github.com/smilejay/python.git 4 | 5 | #### My homepage on github.com: https://github.com/smilejay 6 | ```shell 7 | Just for fun. 8 | thanks, 9 | Jay. 10 | ``` 11 | 12 | #### Also, you can contact me via my blog: http://smilejay.com/ 13 | -------------------------------------------------------------------------------- /wsgi_benchmarking_2018/app.py: -------------------------------------------------------------------------------- 1 | def application(environment, start_response): 2 | """ 3 | The main WSGI Application. Doesn't really do anything 4 | since we're benchmarking the servers, not this code :) 5 | """ 6 | 7 | start_response( 8 | '200 OK', # Status 9 | [('Content-type', 'text/plain'), ('Content-Length', '2')] # Headers 10 | ) 11 | return ['OK'] 12 | -------------------------------------------------------------------------------- /py2015/my.cfg: -------------------------------------------------------------------------------- 1 | [mysqld] 2 | datadir=/var/lib/mysql 3 | socket=/var/lib/mysql/mysql.sock 4 | user=mysql 5 | # Disabling symbolic-links is recommended to prevent assorted security risks 6 | symbolic-links=0 7 | 8 | [mysqld_safe] 9 | log-error=/var/log/mysqld.log 10 | pid-file=/var/run/mysqld/mysqld.pid 11 | 12 | [jay_test] 13 | log = /var/log/jay-test.log 14 | user = jay 15 | greeting = 'Hello, %(user)s!' 16 | -------------------------------------------------------------------------------- /wsgi_benchmarking_2018/Gunicorn-meinheld.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | PROCESSOR_COUNT=$(nproc) 4 | # This formula is recommended in the Gunicorn documentation 5 | # http://docs.gunicorn.org/en/stable/design.html#how-many-workers 6 | GUNICORN_WORKER_COUNT=$(( PROCESSOR_COUNT * 2 + 1 )) 7 | 8 | gunicorn -w ${GUNICORN_WORKER_COUNT} --worker-class="meinheld.gmeinheld.MeinheldWorker" -b 0.0.0.0:8400 app:application 9 | -------------------------------------------------------------------------------- /py2015/zero_cnt_of_factorial.py: -------------------------------------------------------------------------------- 1 | ''' 2 | calculate how many zero(0)s in the end of n!. 3 | ''' 4 | 5 | 6 | def zero_cnt_of_factorial(num=50): 7 | count = 0 8 | i = 5 9 | while ((num / i) >= 1): 10 | count += num/i 11 | i *= 5 12 | return count 13 | 14 | if __name__ == '__main__': 15 | n = int(raw_input('Enter a number: ')) 16 | print "%d! has %d zeros in the end." % (n, zero_cnt_of_factorial(n)) 17 | -------------------------------------------------------------------------------- /py2015/special_vars.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | class Student(object): 5 | 6 | def __init__(self, name, score): 7 | self.__name = name 8 | self.__score = score 9 | 10 | def print_score(self): 11 | print '%s: %s' % (self.__name, self.__score) 12 | 13 | 14 | if __name__ == '__main__': 15 | jay = Student('Jay', 99) 16 | jay.print_score() 17 | #print jay.__name 18 | print jay._Student__name 19 | -------------------------------------------------------------------------------- /py2015/gevent_demo.py: -------------------------------------------------------------------------------- 1 | from gevent import monkey 2 | monkey.patch_all() 3 | import gevent 4 | import urllib2 5 | 6 | 7 | def f(url): 8 | print('GET: %s' % url) 9 | resp = urllib2.urlopen(url) 10 | data = resp.read() 11 | print('%d bytes received from %s.' % (len(data), url)) 12 | 13 | gevent.joinall([ 14 | gevent.spawn(f, 'https://www.python.org/'), 15 | gevent.spawn(f, 'http://smilejay.com/'), 16 | gevent.spawn(f, 'http://www.baidu.com/'), 17 | ]) 18 | -------------------------------------------------------------------------------- /py2018/try_glob.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import glob 4 | 5 | 6 | def check_files(): 7 | ''' check files modified in an hour''' 8 | program_list = ['qemu', 'libvirtd'] 9 | the_dir = '/tmp/' 10 | time_delta = 3600 11 | for i in program_list: 12 | for j in glob.glob('%s/*%s*' % (the_dir, i)): 13 | if os.path.getmtime(j) >= time.time() - time_delta: 14 | print 'found file: %s' % j 15 | 16 | 17 | if __name__ == '__main__': 18 | check_files() 19 | -------------------------------------------------------------------------------- /py2015/args_kargs.py: -------------------------------------------------------------------------------- 1 | # *-* encoding=utf-8 *-* 2 | ''' 3 | just try to use *args and **kargs. 4 | *args表示任何多个无名参数,它是一个tuple;**kwargs表示关键字参数,它是一个dict。并且同时使用*args和**kwargs时,必须*args参数列要在**kwargs前 5 | ''' 6 | 7 | def foo(*args, **kwargs): 8 | print 'args = ', args 9 | print 'kwargs = ', kwargs 10 | print '---------------------------------------' 11 | 12 | if __name__ == '__main__': 13 | foo(1,2,3,4) 14 | foo(a=1,b=2,c=3) 15 | foo(1,2,3,4, a=1,b=2,c=3) 16 | foo('a', 1, None, a=1, b='2', c=3) 17 | -------------------------------------------------------------------------------- /py2016/try_turtle.py: -------------------------------------------------------------------------------- 1 | # *-* coding=utf-8 *-* 2 | import turtle 3 | import time 4 | #定义绘制时画笔的颜色 5 | turtle.color("purple") 6 | #定义绘制时画笔的线条的宽度 7 | turtle.pensize(5) 8 | #定义绘图的速度 9 | turtle.speed(10) 10 | #以0,0为起点进行绘制 11 | turtle.goto(0,0) 12 | #绘出正方形的四条边 13 | for i in range(4): 14 | turtle.forward(100) 15 | turtle.right(90) 16 | #画笔移动到点(-150,-120)时不绘图 17 | turtle.up() 18 | turtle.goto(-150,-120) 19 | #再次定义画笔颜色 20 | turtle.color("red") 21 | #在(-150,-120)点上打印"Done" 22 | turtle.write("Done") 23 | turtle.done() 24 | #time.sleep(10) 25 | -------------------------------------------------------------------------------- /py2011/socket_client.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 2011-8-14 3 | 4 | @author: Jay Ren 5 | ''' 6 | 7 | import socket 8 | 9 | 10 | def socket_client(): 11 | HOST = '127.0.0.1' # The remote host 12 | PORT = 5007 # The same port as used by the server 13 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 14 | s.connect((HOST, PORT)) 15 | s.send(b'Hello, world') 16 | data = s.recv(1024) 17 | s.close() 18 | print('Received', repr(data)) 19 | 20 | if __name__ == '__main__': 21 | socket_client() 22 | -------------------------------------------------------------------------------- /py2015/try_configparser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import ConfigParser 4 | 5 | config = ConfigParser.ConfigParser() 6 | config.read('my.cfg') 7 | 8 | print config.get('mysqld', 'socket') 9 | print config.get('mysqld_safe', 'pid-file') 10 | print config.get('jay_test', 'greeting', raw=0) 11 | print config.get('jay_test', 'greeting', raw=1) 12 | 13 | config.set('jay_test', 'log', '/var/log/jay-test-new.log') 14 | config.add_section('new_section') 15 | config.set('new_section', 'language', 'Python') 16 | with open('my-new.cfg', 'wb') as configfile: 17 | config.write(configfile) 18 | -------------------------------------------------------------------------------- /wsgi_benchmarking_2018/CherryPy.wsgi: -------------------------------------------------------------------------------- 1 | import socket 2 | try: 3 | from cheroot.wsgi import Server as WSGIServer 4 | except ImportError: 5 | from cherrypy.wsgiserver import CherryPyWSGIServer as WSGIServer 6 | from app import application 7 | 8 | server = WSGIServer( 9 | bind_addr=('0.0.0.0', 8100), 10 | wsgi_app=application, 11 | request_queue_size=500, 12 | server_name=socket.gethostname() 13 | ) 14 | 15 | if __name__ == '__main__': 16 | try: 17 | server.start() 18 | except KeyboardInterrupt: 19 | pass 20 | finally: 21 | server.stop() 22 | -------------------------------------------------------------------------------- /py2015/temp-socket.py: -------------------------------------------------------------------------------- 1 | # use this socket server to debug an issue. 2 | 3 | import socket 4 | 5 | address = ('127.0.0.1', 8100) 6 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 7 | s.bind(address) 8 | s.listen(5) 9 | 10 | while(True): 11 | ss, addr = s.accept() 12 | print 'got connected from', addr 13 | 14 | ss.send('byebye') 15 | ra = ss.recv(512) 16 | print ra 17 | 18 | for i in ra.split('\r\n\r\n'): 19 | print i 20 | 21 | for i in ra.split('\r\n'): 22 | print i.encode('hex') 23 | print i 24 | 25 | ss.close() 26 | 27 | s.close() 28 | -------------------------------------------------------------------------------- /py2016/try_argparse.py: -------------------------------------------------------------------------------- 1 | ''' 2 | just have a try with argparse. 3 | e.g. python try_argparse.py -v -c 10 4 | ''' 5 | import argparse 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument("-v", "--verbose", help="increase output verbosity", 8 | action="store_true") 9 | parser.add_argument("-c", "--copy", help="use * copies") 10 | args = parser.parse_args() 11 | if args.verbose: 12 | print "verbosity turned on" 13 | else: 14 | print "No --verbose arg" 15 | if args.copy: 16 | copies = int(args.copy) 17 | for i in range(0, copies): 18 | print '%d copy' % i 19 | -------------------------------------------------------------------------------- /py2015/try_beautifulsoup4.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | ''' 4 | Beautiful Soup sits atop an HTML or XML parser, providing Pythonic idioms 5 | for iterating, searching, and modifying the parse tree. 6 | # pip install beautifulsoup4 7 | # pip install requests 8 | ''' 9 | 10 | import requests 11 | from bs4 import BeautifulSoup 12 | 13 | url = 'http://www.baidu.com' 14 | headers = {'User-Agent': 'Mozilla/5.0 (Macintosh) Gecko/20100101 Firefox/38.0'} 15 | request = requests.get(url, headers=headers) 16 | if request.ok: 17 | soup = BeautifulSoup(request.text, 'html.parser') 18 | print soup.title.string 19 | -------------------------------------------------------------------------------- /py2015/try_requests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | ''' 4 | just try a lib for http request. 5 | # pip install requests 6 | 7 | To void an 'InsecurePlatformWarning': 8 | You only need to install the security package extras. 9 | $ pip install requests[security] 10 | or, install them directly: 11 | $ pip install pyopenssl ndg-httpsclient pyasn1 12 | ''' 13 | 14 | import requests 15 | 16 | url = 'http://www.baidu.com' 17 | headers = {'User-Agent': 'Mozilla/5.0 (Macintosh) Gecko/20100101 Firefox/38.0'} 18 | request = requests.get(url, headers=headers) 19 | if request.ok: 20 | print(request.text) 21 | 22 | -------------------------------------------------------------------------------- /py2016/randomMAC.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # *-* coding:utf-8 *-* 3 | 4 | import random 5 | 6 | 7 | def randomMAC(): 8 | ''' 9 | generate random MAC address. 10 | the first 24 bits are for OUI (Organizationally Unique Identifier). 11 | OUI是由IEEE的注册管理机构给不同厂家分配的代码,区分了不同的厂家。 12 | ''' 13 | mac = [0x00, 0x8c, 0xfa, 14 | random.randint(0x00, 0xff), 15 | random.randint(0x00, 0xff), 16 | random.randint(0x00, 0xff) 17 | ] 18 | return ':'.join(map(lambda x: "%02x" % x, mac)) 19 | 20 | 21 | if __name__ == '__main__': 22 | print randomMAC() 23 | -------------------------------------------------------------------------------- /wsgi_benchmarking_2018/benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | IP=192.168.122.140 4 | PORTS=(8000 8100 8200 8300 8400) 5 | CONNECTIONS=(100 500 1000 5000 10000) 6 | THREADS=8 7 | DURATION=30 8 | BASE=$1 9 | 10 | ulimit -n 10240 11 | 12 | 13 | function perf() { 14 | echo " Testing with $1 threads and $2 connections ..." 15 | ./wrk --duration $DURATION --threads $1 --connections "$2" "http://$IP:$3" > "$3_$1_$2.log" 16 | } 17 | 18 | for connections in "${CONNECTIONS[@]}"; do 19 | for port in "${PORTS[@]}"; do 20 | perf "$THREADS" "$connections" "$port" 21 | sleep 1 22 | done 23 | done 24 | -------------------------------------------------------------------------------- /py2015/yanghui_triangle.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | def task(num): 5 | ret = [] 6 | if num < 1: 7 | return None 8 | elif num == 1: 9 | ret = [1] 10 | else: 11 | ret_before = task(num - 1) 12 | for i in (xrange(num)): 13 | if i == 0: 14 | ret.append(1) 15 | elif len(ret_before) <= i: 16 | ret.append(ret_before[i - 1]) 17 | else: 18 | ret.append(ret_before[i - 1] + ret_before[i]) 19 | return ret 20 | 21 | 22 | for i in range(6): 23 | if task(i): 24 | print task(i) 25 | -------------------------------------------------------------------------------- /py2014/urllib2_test.py: -------------------------------------------------------------------------------- 1 | import urllib2 2 | 3 | url = 'http://192.168.211.23:8080/status' 4 | #url = 'http://192.168.213.142:8080/check/status' 5 | #url = 'hhhp://192.168.213.142:8080/check/status' 6 | req = urllib2.Request(url) 7 | try: 8 | resp = urllib2.urlopen(req, timeout=5) 9 | except urllib2.HTTPError, e: 10 | if e.code == 404: 11 | print 'response code 404' 12 | else: 13 | print 'not 404. response code %d' % e.code 14 | except urllib2.URLError, e: 15 | print 'URLError %s' % e.reason 16 | else: 17 | # 200 18 | print 'http code 200' 19 | body = resp.read() 20 | print body 21 | -------------------------------------------------------------------------------- /py-libvirt/destroy_domains.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import time 3 | import libvirt 4 | 5 | 6 | def destroy_domains(): 7 | ''' 8 | destroy all domains() via libvirt python API. 9 | ''' 10 | conn = libvirt.open(None) 11 | if conn: 12 | for i in conn.listDomainsID(): 13 | dom = conn.lookupByID(i) 14 | dom.destroy() 15 | time.sleep(1) 16 | if conn.listDomainsID(): 17 | print 'ERROR! there are live domains.' 18 | else: 19 | print 'Failed to open connection to the hypervisor' 20 | 21 | 22 | if __name__ == '__main__': 23 | destroy_domains() 24 | -------------------------------------------------------------------------------- /py2013/mysqldb-python2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2.7 2 | # coding=utf-8 3 | 4 | import MySQLdb 5 | import sys 6 | 7 | host = 'localhost' 8 | user = 'root' 9 | pwd = '123456' # to be modified. 10 | db = 'test' 11 | 12 | 13 | if __name__ == '__main__': 14 | conn = MySQLdb.connect(host, user, pwd, db, charset='utf8'); 15 | try: 16 | conn.ping() 17 | except: 18 | print 'failed to connect MySQL.' 19 | sql = 'select * from mytable where id = 2' 20 | cur = conn.cursor() 21 | cur.execute(sql) 22 | row = cur.fetchone() 23 | # print type(row) 24 | for i in row: 25 | print i 26 | cur.close() 27 | conn.close() 28 | sys.exit() 29 | -------------------------------------------------------------------------------- /py2015/mutiprocessing_pool.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Pool 2 | import os 3 | import time 4 | import random 5 | 6 | 7 | def long_time_task(name): 8 | print 'Run task %s (%s)...' % (name, os.getpid()) 9 | start = time.time() 10 | time.sleep(random.random() * 3) 11 | end = time.time() 12 | print 'Task %s runs %0.2f seconds.' % (name, (end - start)) 13 | 14 | 15 | if __name__ == '__main__': 16 | print 'Parent process %s.' % os.getpid() 17 | p = Pool() 18 | for i in range(5): 19 | p.apply_async(long_time_task, args=(i,)) 20 | print 'Waiting for all subprocesses done...' 21 | p.close() 22 | p.join() 23 | print 'All subprocesses done.' 24 | -------------------------------------------------------------------------------- /py2011/socket_server.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 2011-8-14 3 | 4 | @author: Jay Ren 5 | ''' 6 | 7 | import socket 8 | 9 | 10 | def socket_server(): 11 | HOST = '127.0.0.1' # Symbolic name meaning all available interfaces 12 | PORT = 5007 # Arbitrary non-privileged port 13 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 14 | s.bind((HOST, PORT)) 15 | s.listen(1) 16 | conn, addr = s.accept() 17 | print('Connected by', addr) 18 | while True: 19 | data = conn.recv(1024) 20 | if not data: 21 | break 22 | conn.send(data) 23 | conn.close() 24 | 25 | if __name__ == '__main__': 26 | socket_server() 27 | -------------------------------------------------------------------------------- /py2018/proc_start_time.py: -------------------------------------------------------------------------------- 1 | ''' 2 | get the start time of a process 3 | e.g. python proc_start_time.py -p 12345 4 | ''' 5 | import argparse 6 | import psutil 7 | import os 8 | import time 9 | 10 | 11 | def get_start_time(pid): 12 | ''' return the start time in human readable string ''' 13 | p = psutil.Process(int(pid)) 14 | return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(p.create_time())) 15 | 16 | 17 | if __name__ == '__main__': 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument("-p", "--pid", help="PID of a process") 20 | args = parser.parse_args() 21 | if not args.pid: 22 | args.pid = os.getpid() 23 | print get_start_time(args.pid) 24 | -------------------------------------------------------------------------------- /py2015/threading_local.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding: utf-8 _*_ 3 | 4 | import threading 5 | 6 | # 创建全局ThreadLocal对象: 7 | local_school = threading.local() 8 | 9 | 10 | def process_student(): 11 | print 'Hello, %s (in %s)' % (local_school.student, 12 | threading.current_thread().name) 13 | 14 | 15 | def process_thread(name): 16 | # 绑定ThreadLocal的student: 17 | local_school.student = name 18 | process_student() 19 | 20 | 21 | t1 = threading.Thread(target= process_thread, args=('Alice',), 22 | name='Thread-A') 23 | t2 = threading.Thread(target= process_thread, args=('Bob',), 24 | name='Thread-B') 25 | t1.start() 26 | t2.start() 27 | t1.join() 28 | t2.join() 29 | -------------------------------------------------------------------------------- /py2016/draw_a_tree_with_turtle.py: -------------------------------------------------------------------------------- 1 | from turtle import Turtle 2 | import time 3 | 4 | 5 | def tree(tlist, l, a, f): 6 | if l > 5: 7 | lst = [] 8 | for t in tlist: 9 | t.forward(l) 10 | p = t.clone() 11 | t.left(a) 12 | p.right(a) 13 | lst.append(t) 14 | lst.append(p) 15 | time.sleep(1) 16 | tree(lst, l*f, a, f) 17 | 18 | 19 | def main(): 20 | t = Turtle() 21 | t.color('green') 22 | t.pensize(5) 23 | #t.hideturtle() 24 | #t.speed(1) 25 | t.getscreen().tracer(30, 0) 26 | t.left(90) 27 | t.penup() 28 | t.goto(0, -200) 29 | t.pendown() 30 | tree([t], 150, 60, 0.6) 31 | 32 | 33 | if __name__ == '__main__': 34 | main() 35 | -------------------------------------------------------------------------------- /utils/run_cmd.py: -------------------------------------------------------------------------------- 1 | from subprocess import Popen, PIPE, STDOUT 2 | 3 | def shell_output(cmd): 4 | ''' execute a shell command and get its output (stdout/stderr) ''' 5 | p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT) 6 | return p.communicate()[0] 7 | 8 | 9 | def shell_rc_and_output(cmd): 10 | ''' execute a shell command and get its return code and output (stdout/stderr) ''' 11 | p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT) 12 | out = p.communicate()[0] 13 | rc = p.returncode 14 | return rc, out 15 | 16 | 17 | if __name__ == "__main__": 18 | cmd = 'ls -l' 19 | print(shell_output(cmd)) 20 | rc, out = shell_rc_and_output(cmd) 21 | print(rc) 22 | print('rc: {}, out: {}'.format(rc, out)) 23 | -------------------------------------------------------------------------------- /py2013/rename_files.py: -------------------------------------------------------------------------------- 1 | # -*- coding: gb2312 -*- 2 | 3 | ''' 4 | Created on 2013-1-27 5 | 6 | @author: Jay Ren 7 | @module: rename_files 8 | @note: rename files in a Windows system. 9 | ''' 10 | 11 | import os 12 | import re 13 | 14 | path = "C:\\Users\\yren9\\Documents\\temp" 15 | 16 | def rename_files(): 17 | prefix = "\[电影天堂-www\.dygod\.com\]" 18 | for file in os.listdir(path): 19 | if os.path.isfile(os.path.join(path,file))==True: 20 | if re.match("\[电影天堂-www\.dygod\.com\].+", file): 21 | new_name = re.sub(prefix, "", file) 22 | # print(file) 23 | # print(new_name) 24 | os.rename(os.path.join(path,file),os.path.join(path,new_name)) 25 | 26 | if __name__ == '__main__': 27 | rename_files() 28 | -------------------------------------------------------------------------------- /py2013/user_agent_phantomjs.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Dec 6, 2013 3 | 4 | @author: Jay 5 | @summary: Set user-agent before using PhantomJS to get a web page. 6 | ''' 7 | 8 | from selenium import webdriver 9 | from selenium.webdriver.common.desired_capabilities import DesiredCapabilities 10 | 11 | dcap = dict(DesiredCapabilities.PHANTOMJS) 12 | dcap["phantomjs.page.settings.userAgent"] = ( 13 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:25.0) Gecko/20100101 Firefox/25.0 " 14 | ) 15 | 16 | driver = webdriver.PhantomJS(executable_path='./phantomjs', desired_capabilities=dcap) 17 | driver.get("http://dianping.com/") 18 | cap_dict = driver.desired_capabilities 19 | for key in cap_dict: 20 | print '%s: %s' % (key, cap_dict[key]) 21 | print driver.current_url 22 | driver.quit 23 | -------------------------------------------------------------------------------- /py2013/try_phantomjs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | ''' 5 | Created on Dec 6, 2013 6 | 7 | @author: Jay 8 | @description: use PhantomJS to parse a web page to get the geo info of an IP 9 | For PhantomJS, please visit: http://phantomjs.org/ 10 | ''' 11 | 12 | from selenium import webdriver 13 | 14 | import sys 15 | reload(sys) 16 | sys.setdefaultencoding('utf-8') 17 | 18 | driver = webdriver.PhantomJS(executable_path='./phantomjs') 19 | driver.get("http://www.ip.cn/125.95.26.81") 20 | #print driver.current_url 21 | #print driver.page_source 22 | print driver.find_element_by_id('result').text.split('\n')[0].split('来自:')[1] 23 | #text = driver.find_element_by_xpath('//div[@id="result"]/div/p').text 24 | #result = text.split('来自:')[1] 25 | #print result 26 | driver.quit 27 | -------------------------------------------------------------------------------- /py2020/test_decorator.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | 3 | 4 | ''' useful doc: https://www.runoob.com/w3cnote/python-func-decorators.html ''' 5 | 6 | def logit(logfile='logit.log'): 7 | def logging_decorator(func): 8 | @wraps(func) 9 | def wrapped_function(*args, **kwargs): 10 | log_string = func.__name__ + " was called" 11 | print(log_string) 12 | with open(logfile, 'a') as opened_file: 13 | opened_file.write(log_string + '\n') 14 | return func(*args, **kwargs) 15 | return wrapped_function 16 | return logging_decorator 17 | 18 | 19 | @logit() 20 | def myfunc1(): 21 | pass 22 | 23 | 24 | @logit(logfile='func2.log') 25 | def myfunc2(): 26 | pass 27 | 28 | 29 | if __name__ == '__main__': 30 | myfunc1() 31 | myfunc2() -------------------------------------------------------------------------------- /py2017/mutiprocessing_pool_return_value.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Pool 2 | import os 3 | import time 4 | import random 5 | 6 | 7 | def exponent(name, x, y): 8 | print 'Run task %s (%s)...' % (name, os.getpid()) 9 | start = time.time() 10 | time.sleep(random.random() * 3) 11 | end = time.time() 12 | print 'Task %s runs %0.2f seconds.' % (name, (end - start)) 13 | return x**y 14 | 15 | 16 | if __name__ == '__main__': 17 | print 'Parent process %s.' % os.getpid() 18 | p = Pool(4) 19 | results = [p.apply_async(exponent, args=(i, x, y)) 20 | for (i, x, y) in zip(range(10), range(10), range(10))] 21 | print 'Waiting for all subprocesses done...' 22 | p.close() 23 | p.join() 24 | print 'All subprocesses done.' 25 | output = [r.get() for r in results] 26 | print output 27 | -------------------------------------------------------------------------------- /py2013/date_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | ''' 4 | Created on Sep 20, 2013 5 | @summary: some of the methods to handle date calculation 6 | @author: Jay 7 | ''' 8 | 9 | import datetime 10 | import calendar 11 | 12 | 13 | def get_a_month_val(year, month, day=1): 14 | day = datetime.datetime(year, month, day) 15 | print day 16 | last_day_previous_month = day.replace(day=1) - datetime.timedelta(days=1) 17 | first_day_this_month = day.replace(day=1) 18 | last_day_this_month = day.replace(day=calendar.monthrange(year, month)[1]) 19 | last_day_this_month_1 = day.replace(day=calendar.mdays[day.month]) 20 | print last_day_previous_month 21 | print first_day_this_month 22 | print last_day_this_month 23 | print last_day_this_month_1 24 | 25 | if __name__ == '__main__': 26 | get_a_month_val(2013, 10, 20) 27 | -------------------------------------------------------------------------------- /py2016/for_else.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # test how to use for-else / while-else clause. 3 | # else sub-clause will be executed when there's no break/return/exception in the 4 | # iteration. 5 | 6 | list1 = [1, 2, 3, 4, 5] 7 | list2 = [4, 5, 6, 7, 8] 8 | 9 | for i in list1: 10 | if i > 5: 11 | print 'item is larger than 5; the index in list1 is %d' % list1.index(i) 12 | break 13 | else: 14 | print 'No item in list1 is larger than 5.' 15 | 16 | for i in list2: 17 | if i > 5: 18 | print 'item is larger than 5; the index in list2 is %d' % list2.index(i) 19 | break 20 | else: 21 | print 'No item in list2 is larger than 5.' 22 | 23 | 24 | i = 0 25 | while i < 10: 26 | if i > 5: 27 | print '%d is larger than 5' % i 28 | break 29 | i += 1 30 | else: 31 | print 'No one is larger than 5' 32 | -------------------------------------------------------------------------------- /py2014/mem_profile.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | Created on May 31, 2014 5 | 6 | @author: Jay 7 | @description: use memory_profiler module for profiling programs/functions. 8 | ''' 9 | 10 | from memory_profiler import profile 11 | from memory_profiler import memory_usage 12 | import time 13 | 14 | 15 | @profile 16 | def my_func(): 17 | a = [1] * (10 ** 6) 18 | b = [2] * (2 * 10 ** 7) 19 | del b 20 | return a 21 | 22 | 23 | def cur_python_mem(): 24 | mem_usage = memory_usage(-1, interval=0.2, timeout=1) 25 | return mem_usage 26 | 27 | 28 | def f(a, n=100): 29 | time.sleep(1) 30 | b = [a] * n 31 | time.sleep(1) 32 | return b 33 | 34 | if __name__ == '__main__': 35 | a = my_func() 36 | print cur_python_mem() 37 | print "" 38 | print memory_usage((f, (1,), {'n': int(1e6)}), interval=0.5) 39 | -------------------------------------------------------------------------------- /py2016/try_collections.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from collections import defaultdict 4 | 5 | colors = ['red', 'green', 'white', 'red', 'blue', 'red'] 6 | 7 | d = {} 8 | for c in colors: 9 | if c not in d: 10 | d[c] = 0 11 | d[c] += 1 12 | print(d) 13 | 14 | d1 = {} 15 | for c in colors: 16 | d1[c] = d1.get(c, 0) + 1 17 | print(d1) 18 | 19 | d2 = defaultdict(int) 20 | for c in colors: 21 | d2[c] += 1 22 | print(d2) 23 | 24 | 25 | from collections import OrderedDict 26 | od = OrderedDict() 27 | od['z'] = 1 28 | od['y'] = 2 29 | od['x'] = 3 30 | print(od.keys()) # 按照插入的Key的顺序返回 31 | 32 | 33 | from collections import deque 34 | 35 | # 双端队列 36 | q = deque(['a', 'b', 'c']) 37 | q.append('x') 38 | q.appendleft('y') 39 | print(q) 40 | 41 | 42 | from collections import Counter 43 | c = Counter() 44 | for ch in 'helloworld': 45 | c[ch] = c[ch] + 1 46 | print(c) -------------------------------------------------------------------------------- /py2015/mutiprocessing_queue.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ coding:utf-8 _*_ 3 | 4 | from multiprocessing import Process, Queue 5 | import time 6 | import random 7 | 8 | 9 | # 写数据进程执行的代码: 10 | def write(q): 11 | for value in ['A', 'B', 'C']: 12 | print 'Put %s to queue...' % value 13 | q.put(value) 14 | time.sleep(random.random()) 15 | 16 | 17 | # 读数据进程执行的代码: 18 | def read(q): 19 | while True: 20 | value = q.get(True) 21 | print 'Get %s from queue.' % value 22 | 23 | 24 | if __name__ == '__main__': 25 | # 父进程创建Queue,并传给各个子进程: 26 | queue = Queue() 27 | p_write = Process(target=write, args=(queue,)) 28 | p_read = Process(target=read, args=(queue,)) 29 | # 启动子进程pw,写入: 30 | p_write.start() 31 | # 启动子进程pr,读取: 32 | p_read.start() 33 | # 等待pw结束: 34 | p_write.join() 35 | # pr进程里是死循环,无法等待其结束,只能强行终止: 36 | p_read.terminate() 37 | -------------------------------------------------------------------------------- /py2014/string2dict.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Oct 14, 2014 3 | 4 | @author: Jay 5 | ''' 6 | 7 | import MySQLdb 8 | import ast 9 | import json 10 | 11 | 12 | def my_run(): 13 | try: 14 | s = '{"host":"10.1.77.20", "port":3306, "user":"abc",\ 15 | "passwd":"123", "db":"mydb", "connect_timeout":10}' 16 | d = ast.literal_eval(s) 17 | print type(d) 18 | print d 19 | d1 = eval(s) 20 | print type(d1) 21 | print d1 22 | d2 = json.loads(s) 23 | print type(d2) 24 | print d2 25 | MySQLdb.Connect(host=d['host'], port=d['port'], user=d['user'], 26 | passwd=d['passwd'], db=d['db'], 27 | connect_timeout=d['connect_timeout']) 28 | print 'right' 29 | except Exception, e: 30 | print 'wrong %s' % e 31 | 32 | 33 | if __name__ == '__main__': 34 | my_run() 35 | -------------------------------------------------------------------------------- /py2013/try_pycurl.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | ''' 5 | Created on Dec 15, 2013 6 | 7 | @author: Jay 8 | ''' 9 | 10 | import sys 11 | import pycurl 12 | import time 13 | 14 | class Test: 15 | def __init__(self): 16 | self.contents = '' 17 | 18 | def body_callback(self, buf): 19 | self.contents = self.contents + buf 20 | 21 | sys.stderr.write("Testing %s\n" % pycurl.version) 22 | 23 | start_time = time.time() 24 | 25 | url = 'http://www.dianping.com/shanghai' 26 | t = Test() 27 | c = pycurl.Curl() 28 | c.setopt(c.URL, url) 29 | c.setopt(c.WRITEFUNCTION, t.body_callback) 30 | c.perform() 31 | end_time = time.time() 32 | duration = end_time - start_time 33 | print c.getinfo(pycurl.HTTP_CODE), c.getinfo(pycurl.EFFECTIVE_URL) 34 | c.close() 35 | 36 | print 'pycurl takes %s seconds to get %s ' % (duration, url) 37 | 38 | print 'lenth of the content is %d' % len(t.contents) 39 | #print(t.contents) 40 | -------------------------------------------------------------------------------- /py2021/my_time_it.py: -------------------------------------------------------------------------------- 1 | # *_* coding=utf-8 *_* 2 | 3 | import time 4 | import functools 5 | 6 | 7 | # 装饰器 用于打印函数执行耗时 性能分析很有用 8 | # 这是为python2 写的; python3中 不要使用time.clock()了 9 | # DeprecationWarning: time.clock has been deprecated in Python 3.3 and will be removed from Python 3.8: use time.perf_counter or time.process_time instead 10 | def time_it(func): 11 | @functools.wraps(func) 12 | def _wrapper(*args, **kwargs): 13 | # start = time.clock() 14 | start = time.time() 15 | func(*args, **kwargs) 16 | # end = time.clock() 17 | end = time.time() 18 | print("function %s() costs %s second(s)" % (func.__name__, end - start)) 19 | return _wrapper 20 | 21 | 22 | @time_it 23 | def test1(x, y): 24 | time.sleep(1) 25 | return x + y 26 | 27 | 28 | @time_it 29 | def test2(x, y): 30 | time.sleep(3) 31 | return x + y 32 | 33 | if __name__ == '__main__': 34 | test1(3, 5) 35 | test2(3, 5) 36 | -------------------------------------------------------------------------------- /py2021/test_gc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import gc 3 | import time 4 | import psutil 5 | 6 | 7 | def print_memory_info(): 8 | pid = os.getpid() 9 | p = psutil.Process(pid) 10 | 11 | info = p.memory_full_info() 12 | MB = 1024 * 1024 13 | memory = info.uss / MB 14 | print('used %d MB' % memory) 15 | 16 | def test_func(): 17 | print("test start") 18 | print_memory_info() 19 | length = 1000 * 1000 20 | list = [i for i in range(length)] 21 | print_memory_info() 22 | 23 | def test1_func(): 24 | print("test1 start") 25 | print_memory_info() 26 | length = 1000 * 1000 27 | list_a = [i for i in range(length)] 28 | list_b = [i for i in range(length)] 29 | list_a.append(list_b) 30 | list_b.append(list_a) 31 | print_memory_info() 32 | return list 33 | 34 | 35 | test_func() 36 | print_memory_info() 37 | test1_func() 38 | print_memory_info() 39 | time.sleep(10) 40 | print_memory_info() 41 | gc.collect() 42 | print_memory_info() 43 | -------------------------------------------------------------------------------- /py2013/jaymodule.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static PyObject * jay_hello(PyObject *self, PyObject *args) { 4 | const char *person; 5 | int sts; 6 | if (!PyArg_ParseTuple(args, "s", &person)) 7 | return NULL; 8 | printf("Hello, %s. I'm Jay.\n", person ); 9 | return Py_None; 10 | /* 11 | sts = printf("Hello, %s. I'm Jay.\n", person ); 12 | return Py_BuildValue("i", sts); 13 | */ 14 | } 15 | 16 | static PyMethodDef JayMethods[] = { 17 | {"hello", jay_hello, METH_VARARGS, 18 | "Say hello to a person from Jay."}, 19 | {NULL, NULL, 0, NULL} /* Sentinel */ 20 | }; 21 | 22 | PyMODINIT_FUNC initjay(void) { 23 | PyImport_AddModule("jay"); 24 | (void) Py_InitModule("jay", JayMethods); 25 | } 26 | 27 | 28 | int main(int argc, char *argv[]) { 29 | /* Pass argv[0] to the Python interpreter */ 30 | Py_SetProgramName(argv[0]); 31 | 32 | /* Initialize the Python interpreter. Required. */ 33 | Py_Initialize(); 34 | 35 | /* Add a static module */ 36 | initjay(); 37 | Py_Exit(0); 38 | } 39 | -------------------------------------------------------------------------------- /py2014/network.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Sep 4, 2014 3 | 4 | @author: Jay 5 | ''' 6 | 7 | import socket 8 | 9 | 10 | def ip_validation(ip): 11 | ''' 12 | check if the ip address is in a valid format. 13 | ''' 14 | try: 15 | socket.inet_aton(ip) 16 | return True 17 | except socket.error: 18 | return False 19 | 20 | 21 | def connection_validation(ip, port): 22 | ''' 23 | check if the ip:port can be connected using socket. 24 | @param port: the port is an integer. 25 | ''' 26 | if not ip_validation(ip): 27 | return False 28 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 29 | sock.settimeout(2) 30 | result = sock.connect_ex((ip, port)) 31 | if result == 0: 32 | return True 33 | else: 34 | return False 35 | 36 | 37 | if __name__ == '__main__': 38 | ip = '192.168.213.11' 39 | port = 90 40 | print ip_validation(ip) 41 | print connection_validation(ip, port) 42 | -------------------------------------------------------------------------------- /py2022/remove_item_in_for_list.py: -------------------------------------------------------------------------------- 1 | list1 = [1, 2, 3, 4, 5, 2, 2] 2 | for i in list1: 3 | if i == 2: 4 | list1.remove(i) 5 | print(list1) 6 | 7 | 8 | # list1 = [1, 2, 3, 4, 5, 2, 2] 9 | # for i in range(len(list1)): 10 | # if list1[i] == 2: 11 | # list1.remove(list1[i]) 12 | # IndexError: list index out of range 13 | # print(list1) 14 | 15 | list1 = [1, 2, 3, 4, 5, 2, 2] 16 | list2 = list(filter(lambda x: x!=2, list1)) 17 | print(list2) 18 | 19 | list1 = [1, 2, 3, 4, 5, 2, 2] 20 | list2 = [i for i in list1 if i!=2] 21 | print(list2) 22 | 23 | list1 = [1, 2, 3, 4, 5, 2, 2] 24 | for i in list1[:]: 25 | if i == 2: 26 | list1.remove(i) 27 | print(list1) 28 | 29 | import copy 30 | 31 | list1 = [1, 2, 3, 4, 5, 2, 2] 32 | # list2 = copy.copy(list1) 33 | list2 = copy.deepcopy(list1) 34 | for i in list1: 35 | if i == 2: 36 | list2.remove(i) 37 | print(list2) 38 | 39 | list1 = [1, 2, 3, 4, 5, 2, 2] 40 | while 2 in list1: 41 | list1.remove(2) 42 | print(list1) 43 | -------------------------------------------------------------------------------- /leetcode/medium/buy_sell_stock_once_max_profit.py: -------------------------------------------------------------------------------- 1 | # -*- coding:UTF-8 -*- 2 | ''' 3 | 给定一个数组 prices ,它的第 i 个元素 prices[i] 表示一支给定股票第 i 天的价格。 4 | 你只能选择 某一天 买入这只股票,并选择在 未来的某一个不同的日子 卖出该股票。设计一个算法来计算你所能获取的最大利润。 5 | 返回你可以从这笔交易中获取的最大利润。如果你不能获取任何利润,返回 0 。 6 | 7 | 思路: 8 | 如果我是在历史最低点买的股票就好了!太好了,在题目中,我们只要用一个变量记录一个历史最低价格 minprice, 9 | 我们就可以假设自己的股票是在那天买的。那么我们在第 i 天卖出股票能得到的利润就是 prices[i] - minprice。 10 | 因此,我们只需要遍历价格数组一遍,记录历史最低点,然后在每一天考虑这么一个问题: 11 | 如果我是在历史最低点买进的,那么我今天卖出能赚多少钱?当考虑完所有天数之时,我们就得到了最好的答案。 12 | 13 | 14 | 来源:力扣(LeetCode) 15 | 链接:https://leetcode-cn.com/problems/best-time-to-buy-and-sell-stock 16 | ''' 17 | 18 | 19 | class Solution: 20 | def maxProfit(self, prices): 21 | min_price = float('inf') 22 | max_profit = 0 23 | for price in prices: 24 | min_price = min(min_price, price) 25 | max_profit = max(max_profit, price - min_price) 26 | return max_profit 27 | 28 | 29 | if __name__ == '__main__': 30 | l1 = [7,1,5,3,6,4] 31 | l2 = [7,6,4,3,1] 32 | s = Solution() 33 | print(s.maxProfit(l1)) 34 | print(s.maxProfit(l2)) 35 | -------------------------------------------------------------------------------- /py2015/taskworker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ encoding: utf-8 _*_ 3 | # taskworker.py 4 | 5 | import time 6 | import Queue 7 | from multiprocessing.managers import BaseManager 8 | 9 | # 创建类似的QueueManager: 10 | class QueueManager(BaseManager): 11 | pass 12 | 13 | # 由于这个QueueManager只从网络上获取Queue,所以注册时只提供名字: 14 | QueueManager.register('get_task_queue') 15 | QueueManager.register('get_result_queue') 16 | 17 | # 连接到服务器,也就是运行taskmanager.py的机器: 18 | server_addr = '127.0.0.1' 19 | print('Connect to server %s...' % server_addr) 20 | # 端口和验证码注意保持与taskmanager.py设置的完全一致: 21 | m = QueueManager(address=(server_addr, 5000), authkey='abc') 22 | # 从网络连接: 23 | m.connect() 24 | # 获取Queue的对象: 25 | task = m.get_task_queue() 26 | result = m.get_result_queue() 27 | # 从task队列取任务,并把结果写入result队列: 28 | for i in range(10): 29 | try: 30 | n = task.get(timeout=1) 31 | print('run task %d * %d...' % (n, n)) 32 | r = '%d * %d = %d' % (n, n, n * n) 33 | time.sleep(1) 34 | result.put(r) 35 | except Queue.Empty: 36 | print('task queue is empty.') 37 | # 处理结束: 38 | print('worker exit.') 39 | -------------------------------------------------------------------------------- /py2015/taskmanager.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # _*_ encoding: utf-8 _*_ 3 | 4 | # taskmanager.py 5 | 6 | import random 7 | import Queue 8 | from multiprocessing.managers import BaseManager 9 | 10 | # 发送任务的队列: 11 | task_queue = Queue.Queue() 12 | # 接收结果的队列: 13 | result_queue = Queue.Queue() 14 | 15 | 16 | # 从BaseManager继承的QueueManager: 17 | class QueueManager(BaseManager): 18 | pass 19 | 20 | # 把两个Queue都注册到网络上, callable参数关联了Queue对象: 21 | QueueManager.register('get_task_queue', callable=lambda: task_queue) 22 | QueueManager.register('get_result_queue', callable=lambda: result_queue) 23 | # 绑定端口5000, 设置验证码'abc': 24 | manager = QueueManager(address=('', 5000), authkey='abc') 25 | # 启动Queue: 26 | manager.start() 27 | # 获得通过网络访问的Queue对象: 28 | task = manager.get_task_queue() 29 | result = manager.get_result_queue() 30 | # 放几个任务进去: 31 | for i in range(10): 32 | n = random.randint(0, 10000) 33 | print('Put task %d...' % n) 34 | task.put(n) 35 | # 从result队列读取结果: 36 | print('Try get results...') 37 | for i in range(10): 38 | r = result.get(timeout=10) 39 | print('Result: %s' % r) 40 | # 关闭: 41 | manager.shutdown() 42 | -------------------------------------------------------------------------------- /py2016/try_except_else_finally.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import math 3 | 4 | print '====================================' 5 | try: 6 | math.sqrt(-1) 7 | except ValueError, e: 8 | print 'ValueError: %s' % e 9 | except AttributeError, e: 10 | print 'AttributeError: %s' % e 11 | except: 12 | print 'Other Exception: %s' % e 13 | else: 14 | print 'No error found' 15 | finally: 16 | print 'Finally. #1\n' 17 | 18 | 19 | print '====================================' 20 | try: 21 | None.get('a') 22 | except ValueError, e: 23 | print 'ValueError: %s' % e 24 | except AttributeError, e: 25 | print 'AttributeError: %s' % e 26 | except: 27 | print 'Other Exception: %s' % e 28 | else: 29 | print 'No error found' 30 | finally: 31 | print 'Finally. #2\n' 32 | 33 | 34 | print '====================================' 35 | try: 36 | 1 + 1 37 | except ValueError, e: 38 | print 'ValueError: %s' % e 39 | except AttributeError, e: 40 | print 'AttributeError: %s' % e 41 | except: 42 | print 'Other Exception: %s' % e 43 | else: 44 | print 'No error found' 45 | finally: 46 | print 'Finally. #3\n' 47 | -------------------------------------------------------------------------------- /py2016/selenium_capture_safari.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from selenium import webdriver 4 | import time 5 | 6 | 7 | def capture(url, img_file="test1.png"): 8 | safari = webdriver.Safari() 9 | safari.set_window_size(1200, 900) 10 | safari.get(url) 11 | safari.execute_script(""" 12 | (function () { 13 | var y = 0; 14 | var step = 100; 15 | window.scroll(0, 0); 16 | 17 | function f() { 18 | if (y < document.body.scrollHeight) { 19 | y += step; 20 | window.scroll(0, y); 21 | setTimeout(f, 50); 22 | } else { 23 | window.scroll(0, 0); 24 | document.title += "scroll-done"; 25 | } 26 | } 27 | 28 | setTimeout(f, 1000); 29 | })(); 30 | """) 31 | 32 | for i in xrange(30): 33 | if "scroll-done" in safari.title: 34 | break 35 | time.sleep(1) 36 | 37 | safari.save_screenshot(img_file) 38 | safari.close() 39 | 40 | 41 | if __name__ == "__main__": 42 | capture("http://smilejay.com/") 43 | -------------------------------------------------------------------------------- /py2016/selenium_capture_firefox.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from selenium import webdriver 4 | import time 5 | 6 | 7 | def capture(url, img_file="test1.png"): 8 | firefox = webdriver.Firefox() 9 | firefox.set_window_size(1200, 900) 10 | firefox.get(url) 11 | firefox.execute_script(""" 12 | (function () { 13 | var y = 0; 14 | var step = 100; 15 | window.scroll(0, 0); 16 | 17 | function f() { 18 | if (y < document.body.scrollHeight) { 19 | y += step; 20 | window.scroll(0, y); 21 | setTimeout(f, 50); 22 | } else { 23 | window.scroll(0, 0); 24 | document.title += "scroll-done"; 25 | } 26 | } 27 | 28 | setTimeout(f, 1000); 29 | })(); 30 | """) 31 | 32 | for i in xrange(30): 33 | if "scroll-done" in firefox.title: 34 | break 35 | time.sleep(1) 36 | 37 | firefox.save_screenshot(img_file) 38 | firefox.close() 39 | 40 | 41 | if __name__ == "__main__": 42 | capture("http://www.taobao.com") 43 | -------------------------------------------------------------------------------- /py2013/pyexiv2-sample.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2.7 2 | # just a sample for using python-pyexiv2 Library 3 | # pyexiv2 is a Python binding to exiv2, the C++ library for manipulation of EXIF, IPTC and XMP image metadata. 4 | 5 | import pyexiv2 6 | import os 7 | 8 | path = '/home/master/Pictures/wall-paper' 9 | for file in os.listdir(path): 10 | print 'file:' + os.path.join(path,file) 11 | print '--------------------------------------------------------------' 12 | metadata = pyexiv2.ImageMetadata(os.path.join(path,file)) 13 | metadata.read() 14 | print metadata['Exif.Image.DateTime'].value.strftime('%A %d %B %Y, %H:%M:%S') 15 | print metadata['Exif.Image.ImageDescription'].value 16 | print metadata['Exif.Image.Software'].value 17 | print metadata['Exif.Image.ExifTag'].value 18 | key = 'Exif.Photo.UserComment' 19 | value = 'A comment.' 20 | metadata[key] = pyexiv2.ExifTag(key, value) 21 | # metadata[key] = value # this a shotcut method as the previous line. 22 | metadata.write() 23 | print metadata[key].value 24 | metadata[key].value ='A new comment.' 25 | metadata.write() 26 | print metadata[key].value 27 | print '--------------------------------------------------------------' 28 | -------------------------------------------------------------------------------- /py2014/closure.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | def closure1(): 5 | flist = [] 6 | 7 | for i in xrange(3): 8 | def func(x): 9 | return x * i 10 | flist.append(func) 11 | 12 | for f in flist: 13 | print f(2) 14 | 15 | 16 | def closure2(msg): 17 | def printer(): 18 | print msg 19 | return printer 20 | 21 | 22 | def not_closure2(msg): 23 | def printer(msg=msg): 24 | print msg 25 | return printer 26 | 27 | 28 | def generate_power_func(n): 29 | def nth_power(x): 30 | return x ** n 31 | return nth_power 32 | 33 | 34 | def outer(): 35 | d = {'y': 0} 36 | 37 | def inner(): 38 | d['y'] += 1 39 | return d['y'] 40 | return inner 41 | 42 | 43 | def foo(): 44 | a = [1, ] 45 | 46 | def bar(): 47 | a[0] = a[0] + 1 48 | return a[0] 49 | return bar 50 | 51 | 52 | if __name__ == '__main__': 53 | closure1() 54 | printer = closure2('Foo!') 55 | printer() 56 | printer = not_closure2('Foo!') 57 | printer() 58 | raised_to_4 = generate_power_func(4) 59 | del generate_power_func 60 | print raised_to_4(2) 61 | outer = outer() 62 | print outer() 63 | foo = foo() 64 | print foo() 65 | -------------------------------------------------------------------------------- /leetcode/easy/intersection.py: -------------------------------------------------------------------------------- 1 | # -*- coding:UTF-8 -*- 2 | ''' 3 | 给定两个数组,编写一个函数来计算它们的交集。 4 | 5 | 示例 1: 6 | 输入:nums1 = [1,2,2,1], nums2 = [2,2] 7 | 输出:[2] 8 | 9 | 示例 2: 10 | 输入:nums1 = [4,9,5], nums2 = [9,4,9,8,4] 11 | 输出:[9,4] 12 | 13 | 14 | 说明: 15 | * 输出结果中的每个元素一定是唯一的。 16 | * 我们可以不考虑输出结果的顺序。 17 | 18 | 来源:力扣(LeetCode) 19 | 链接:https://leetcode-cn.com/problems/intersection-of-two-arrays 20 | ''' 21 | 22 | class Solution(object): 23 | def intersection(self, nums1, nums2): 24 | """ 25 | :type nums1: List[int] 26 | :type nums2: List[int] 27 | :rtype: List[int] 28 | """ 29 | """ 30 | 元组set 是基于Hash来实现的,故: 31 | 1. set 中的元素必须唯一(不可重复) 32 | 2. set 中的元素必须是Hashable的 33 | 3. 向集合中添加元素、删除元素、检查元素是否存在,都是非常快速的。平均时间复杂度为O(1),最坏的时间复杂度是O(n) 34 | 35 | 本算法 时间复杂度 O(n+m) 空间复杂度 O(n+m) 36 | """ 37 | ret = [] 38 | s1 = set(nums1) 39 | s2 = set(nums2) 40 | # 考虑到检查集合中元素O(1)的复杂度,所以只遍历较小的集合是时间复杂度更低的 41 | if len(s1) > len(s2): 42 | ret = [i for i in s2 if i in s1] 43 | else: 44 | ret = [i for i in s1 if i in s2] 45 | return ret 46 | 47 | 48 | if __name__ == '__main__': 49 | nums1 = [4,9,5] 50 | nums2 = [9,4,9,8,4] 51 | s = Solution() 52 | print(s.intersection(nums1, nums2)) 53 | -------------------------------------------------------------------------------- /py2014/mac_free.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | ''' 3 | Created on Jun 1, 2014 4 | 5 | @author: jay 6 | ''' 7 | 8 | import subprocess 9 | import re 10 | 11 | # Get process info 12 | ps = subprocess.Popen(['ps', '-caxm', '-orss,comm'], stdout=subprocess.PIPE).communicate()[0] 13 | vm = subprocess.Popen(['vm_stat'], stdout=subprocess.PIPE).communicate()[0] 14 | 15 | # Iterate processes 16 | processLines = ps.split('\n') 17 | sep = re.compile('[\s]+') 18 | rssTotal = 0 # kB 19 | for row in range(1,len(processLines)): 20 | rowText = processLines[row].strip() 21 | rowElements = sep.split(rowText) 22 | try: 23 | rss = float(rowElements[0]) * 1024 24 | except: 25 | rss = 0 # ignore... 26 | rssTotal += rss 27 | 28 | # Process vm_stat 29 | vmLines = vm.split('\n') 30 | sep = re.compile(':[\s]+') 31 | vmStats = {} 32 | for row in range(1,len(vmLines)-2): 33 | rowText = vmLines[row].strip() 34 | rowElements = sep.split(rowText) 35 | vmStats[(rowElements[0])] = int(rowElements[1].strip('\.')) * 4096 36 | 37 | print 'Wired Memory:\t\t%d MB' % ( vmStats["Pages wired down"]/1024/1024 ) 38 | print 'Active Memory:\t\t%d MB' % ( vmStats["Pages active"]/1024/1024 ) 39 | print 'Inactive Memory:\t%d MB' % ( vmStats["Pages inactive"]/1024/1024 ) 40 | print 'Free Memory:\t\t%d MB' % ( vmStats["Pages free"]/1024/1024 ) 41 | print 'Real Mem Total (ps):\t%.3f MB' % ( rssTotal/1024/1024 ) -------------------------------------------------------------------------------- /py2011/get_ip_and_location.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | ''' 5 | Created on 2011-8-15 6 | 7 | @author: Jay Ren 笑遍世界 8 | ''' 9 | 10 | import re 11 | import urllib.request 12 | 13 | 14 | def get_reponse_from_url(url): 15 | req = urllib.request.Request(url) 16 | encoding = 'gbk' 17 | try: 18 | doc = urllib.request.urlopen(req).read() 19 | # print(doc.decode(encoding)) 20 | return doc.decode(encoding) 21 | except Exception as e: 22 | print("urlopen Exception : %s" % e) 23 | 24 | 25 | def get_ip_and_location(): 26 | url_ip_qq = "http://fw.qq.com/ipaddress" 27 | url_location_youdao = "http://www.youdao.com/smartresult-xml/search.s?type=ip&q=" 28 | re_ip = "((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))" 29 | str_ip = get_reponse_from_url(url_ip_qq) 30 | ip = re.search(re_ip, str_ip).group(1) 31 | # print("ip="+ip.group(1)) 32 | print("your ip is:"+ip) 33 | url_location_youdao += ip 34 | str_location = get_reponse_from_url(url_location_youdao) 35 | re_location = '(.*)' 36 | location = re.search(re_location, str_location).group(1) 37 | print("you are here:"+location) 38 | 39 | if __name__ == '__main__': 40 | get_ip_and_location() 41 | -------------------------------------------------------------------------------- /py2015/beer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | ''' 3 | calculate how many bottles of beer you can drink. 4 | 1. money: n RMB (e.g. n=10) 5 | 2. price: 2 RMB / bottle 6 | 3. 3 empty bottles --> 1 bottle of beer 7 | 可以最多向商店借1个空瓶, 但需要兑换喝完后将1个空瓶还回。 8 | 2024年update: a. 增加了向商店借空瓶的说明 b. 使用python3而不是python2 9 | ''' 10 | 11 | 12 | def bottles_cnt_beer(money=10): 13 | ''' 14 | 计算能够喝到多少瓶啤酒。 15 | 供参考答案如下: 16 | 10 -> 7 17 | 100 -> 75 18 | 1234 -> 925 19 | 12345 -> 9258 20 | ''' 21 | price = 2 22 | m = 3 # m empty bottles --> 1 bottle of beer 23 | count = int(money / price) 24 | empty_cnt = int(money / price) 25 | while empty_cnt >= m: 26 | count += int(empty_cnt / m) 27 | empty_cnt = int(empty_cnt / m) + int(empty_cnt % m) 28 | # borrow 1 empt bottle from the shop; drink; return 1 empty bottle to the shop. 29 | if empty_cnt == (m - 1): 30 | count += 1 31 | return count 32 | 33 | 34 | if __name__ == '__main__': 35 | n = 10 36 | print("money n={}, you can drink {} bottles of beer.".format(n, bottles_cnt_beer(n))) 37 | n = 100 38 | print("money n={}, you can drink {} bottles of beer.".format(n, bottles_cnt_beer(n))) 39 | n = 1234 40 | print("money n={}, you can drink {} bottles of beer.".format(n, bottles_cnt_beer(n))) 41 | n = int(input('Enter a number: ')) 42 | print("money n={}, you can drink {} bottles of beer.".format(n, bottles_cnt_beer(n))) 43 | -------------------------------------------------------------------------------- /py2022/ssh_scp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # *_* coding=utf-8 *_* 3 | 4 | import paramiko 5 | 6 | hostname_list = ['192.168.1.2', '192.162.1.3'] 7 | username = 'admin' 8 | password = 'yourpassword' 9 | port = 22 10 | 11 | 12 | hostname_list = [] 13 | 14 | def get_hosts(h_file): 15 | with open(h_file) as f: 16 | for l in f.readlines(): 17 | hostname_list.append(l.strip()) 18 | 19 | 20 | def send_file(): 21 | ''' send (or fetch) a file to (or from) a remote linux machine ''' 22 | src_1 = 'ssh_cmd.py' 23 | dst_1 = '/tmp/ssh_cmd.py' 24 | put_files = [(src_1, dst_1)] 25 | for h in hostname_list: 26 | try: 27 | client = paramiko.SSHClient() 28 | client.load_system_host_keys() 29 | client.set_missing_host_key_policy(paramiko.MissingHostKeyPolicy) 30 | print('connecting: %s' % h) 31 | client.connect(h, port=port, username=username, password=password, 32 | timeout=5) 33 | sftp = client.open_sftp() 34 | for p in put_files: 35 | print('src file: %s , dst file: %s' % (p[0], p[1])) 36 | sftp.put(p[0], p[1]) 37 | except Exception as e: 38 | print(e) 39 | finally: 40 | sftp.close() 41 | client.close() 42 | 43 | 44 | if __name__ == '__main__': 45 | host_file = 'temp_test_ips' 46 | get_hosts(host_file) 47 | send_file() 48 | -------------------------------------------------------------------------------- /py2014/fibonacci.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | 4 | def fib1(n): 5 | ''' normal recursion ''' 6 | if n == 0: 7 | return 0 8 | elif n == 1: 9 | return 1 10 | else: 11 | return fib1(n - 1) + fib1(n - 2) 12 | 13 | 14 | known = {0: 0, 1: 1} 15 | 16 | 17 | def fib2(n): 18 | ''' recursion with cached results ''' 19 | if n in known: 20 | return known[n] 21 | 22 | res = fib2(n - 1) + fib2(n - 2) 23 | known[n] = res 24 | return res 25 | 26 | 27 | def fib3(n): 28 | ''' non-recursion ''' 29 | last1 = 0 30 | last2 = 1 31 | if n == 0: 32 | return 0 33 | elif n == 1: 34 | return 1 35 | elif n >= 2: 36 | for _ in range(2, n+1): 37 | res = last1 + last2 38 | last1 = last2 39 | last2 = res 40 | return last2 41 | 42 | 43 | def fib4(n): 44 | ''' use a list to store all the results ''' 45 | l = [0, 1] 46 | for i in range(2, n+1): 47 | l.append(l[i-2] + l[i-1]) 48 | return l[n] 49 | 50 | 51 | if __name__ == '__main__': 52 | n = 40 53 | print(datetime.datetime.now()) 54 | print('fib1(%d)=%d' % (n, fib1(n))) 55 | print(datetime.datetime.now()) 56 | print('fib2(%d)=%d' % (n, fib2(n))) 57 | print(datetime.datetime.now()) 58 | print('fib3(%d)=%d' % (n, fib3(n))) 59 | print(datetime.datetime.now()) 60 | print('fib4(%d)=%d' % (n, fib4(n))) 61 | print(datetime.datetime.now()) 62 | -------------------------------------------------------------------------------- /py2015/android_demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | ''' 4 | Create on 2015-05-05 5 | ''' 6 | import os 7 | import unittest 8 | from selenium import webdriver 9 | import time 10 | 11 | # Appium环境配置 12 | PATH = lambda p: os.path.abspath( 13 | os.path.join(os.path.dirname(__file__), p) 14 | ) 15 | 16 | 17 | class DpAppTests(unittest.TestCase): 18 | 19 | def setUp(self): 20 | desired_caps = {} 21 | desired_caps['platformName'] = 'Android' 22 | desired_caps['platformVersion'] = '4.4' 23 | desired_caps['deviceName'] = 'emulator-5554' 24 | desired_caps['autoLaunch'] = 'true' 25 | # desired_caps['automationName'] = "selendroid" 26 | desired_caps['app'] = PATH( 27 | 'apps/Nova_7.2.0_debug.apk' 28 | ) 29 | desired_caps['appPackage'] = 'com.dianping.v1' 30 | desired_caps[ 31 | 'appActivity'] = 'com.dianping.main.guide.SplashScreenActivity' 32 | 33 | self.driver = webdriver.Remote( 34 | 'http://localhost:4723/wd/hub', desired_caps) 35 | 36 | def tearDown(self): 37 | self.driver.quit() 38 | 39 | def test_dpApp(self): 40 | time.sleep(10) 41 | el = self.driver.find_element_by_xpath( 42 | "//android.widget.TextView[contains(@text,'美食')]") 43 | el.click() 44 | 45 | 46 | if __name__ == '__main__': 47 | suite = unittest.TestLoader().loadTestsFromTestCase(DpAppTests) 48 | unittest.TextTestRunner(verbosity=2).run(suite) 49 | -------------------------------------------------------------------------------- /py2014/vps_fabfile.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # use Fabric to manage all the hosts in perf env. 3 | # usage: fab -f vps_fabfile.py download_backup 4 | # author: Jay 5 | 6 | from fabric.context_managers import cd 7 | #from fabric.context_managers import settings 8 | from fabric.operations import * 9 | from fabric.api import * 10 | from datetime import datetime 11 | 12 | env.hosts = 'smilejay.com' 13 | env.port = 22 14 | env.user = 'root' 15 | env.password = '1234' 16 | 17 | 18 | @task 19 | def put_sshkey(): 20 | # add ssh public key of the master to remote slaves. 21 | with cd('/tmp'): 22 | put('id_rsa.pub.master', 'id_rsa.pub.master') 23 | put('add_sshkey.sh', 'add_sshkey.sh') 24 | run('bash add_sshkey.sh id_rsa.pub.master') 25 | 26 | 27 | @task 28 | def download_backup(): 29 | # backup my WP file and database, download them to the local machine 30 | dt = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") 31 | local_dir = '/home/jay/backup' 32 | with cd('/tmp'): 33 | nginx = '/usr/share/nginx' 34 | wp_root = '/usr/share/nginx/html' 35 | exclude = 'html/wp-content/cache' 36 | bk_name = 'wp_%s.tar.gz' % dt 37 | clean = 'rm -f wp*.tar.gz' 38 | mysql = 'mysqldump -uroot -p1234 -A > %s/mysql-dump.sql' % wp_root 39 | tar = 'tar -zcf %s -C %s html --exclude=%s' % (bk_name, nginx, exclude) 40 | run(clean) 41 | run(mysql) 42 | run(tar) 43 | get(bk_name, '%s/%s' % (local_dir, bk_name)) 44 | -------------------------------------------------------------------------------- /leetcode/easy/two_num_sum.py: -------------------------------------------------------------------------------- 1 | # *_* coding=utf-8 *_* 2 | 3 | ''' 4 | 给定一个整数数组 nums 和一个整数目标值 target,请你在该数组中找出 和为目标值 的那 两个 整数,并返回它们的数组下标。 5 | 你可以假设每种输入只会对应一个答案。但是,数组中同一个元素在答案里不能重复出现。 6 | 你可以按任意顺序返回答案。 7 | 8 | 链接:https://leetcode-cn.com/problems/two-sum 9 | 10 | 1. 暴力破解:时间复杂度 O(N^2) 空间复杂度 O(1) 11 | 2. 哈希表:时间复杂度 O(N) 空间复杂度 O(N) 12 | 13 | ''' 14 | 15 | class Solution(object): 16 | def twoSum(self, nums, target): 17 | """ 18 | :type nums: List[int] 19 | :type target: int 20 | :rtype: List[int] 21 | """ 22 | n = len(nums) 23 | for i in range(n): 24 | for j in range(i + 1, n): 25 | if nums[i] + nums[j] == target: 26 | return [i, j] 27 | return ['not-found', 'not-found'] 28 | 29 | def twoSum_1(self, nums, target): 30 | hashtable = dict() 31 | for i, num in enumerate(nums): 32 | if target - num in hashtable: 33 | return [hashtable[target - num], i] 34 | hashtable[nums[i]] = i 35 | return ['not-found', 'not-found'] 36 | 37 | 38 | if __name__ == '__main__': 39 | num_list = [1, 7, 9, 4, 53, 42] 40 | sum = 62 41 | s = Solution() 42 | print(s.twoSum(num_list, sum)) 43 | print(s.twoSum_1(num_list, sum)) 44 | num_list = [3, 2, 4] 45 | sum = 6 46 | print(s.twoSum(num_list, sum)) 47 | print(s.twoSum_1(num_list, sum)) 48 | num_list = [3, 3] 49 | sum = 6 50 | print(s.twoSum(num_list, sum)) 51 | print(s.twoSum_1(num_list, sum)) 52 | -------------------------------------------------------------------------------- /py2011/VisitBlog.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python3 2 | # -*- coding: utf-8 -*- 3 | 4 | ''' 5 | Created on 2011-04-06 6 | @description: use multithreading to visist some of my blog pages 7 | @author: http://smilejay.com/ 8 | ''' 9 | 10 | import sys 11 | import threading 12 | import urllib.request 13 | 14 | urls = ['kvm_theory_practice/', 15 | 'about/', 16 | 'i_will_laugh_at_the_world/' 17 | ] 18 | 19 | visitTimesPerPage = 10 20 | 21 | 22 | def usage(): 23 | print('Usage:', sys.argv[0], 'host') 24 | 25 | 26 | def main(argv): 27 | host = argv[1] 28 | if host == '': 29 | usage() 30 | sys.exit(2) 31 | else: 32 | for i in range(visitTimesPerPage): 33 | for url in urls: 34 | visitPageThread = VisitPageThread(url + str(i), host, url) 35 | visitPageThread.start() 36 | 37 | 38 | class VisitPageThread(threading.Thread): 39 | 40 | def __init__(self, threadName, host, url): 41 | threading.Thread.__init__(self, name=threadName) 42 | self.host = host 43 | self.url = url 44 | 45 | def run(self): 46 | url = self.host + self.url 47 | req = urllib.request.Request(url) 48 | req.set_proxy('companyname.com:911', 'http') 49 | # you may set you proxy here. 50 | try: 51 | doc = urllib.request.urlopen(req).read() 52 | print(doc) 53 | except Exception as e: 54 | print("urlopen Exception : %s" % e) 55 | 56 | if __name__ == '__main__': 57 | sys.argv.append('http://smilejay.com/') 58 | main(sys.argv) 59 | -------------------------------------------------------------------------------- /py2016/try_docopt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Just try docopt lib for python 3 | 4 | Usage: 5 | try_docopt.py (-h | --help) 6 | try_docopt.py [options] 7 | 8 | Examples: 9 | try_docopt.py -s +ts5,-ts2 -c +tc5,-tc3 10 | 11 | Options: 12 | -h, --help 13 | -s, --testsuite suites #add/remove some testsuites 14 | -c, --testcase cases #add/remove some testcases 15 | 16 | """ 17 | 18 | from docopt import docopt 19 | 20 | testsuites = ['ts1', 'ts2', 'ts3', 'ts4'] 21 | testcases = ['tc1', 'tc2', 'tc3', 'tc4'] 22 | 23 | 24 | def add_remove(tlist, opt_list): 25 | ''' 26 | add/remove item in tlist. 27 | opt_list is a list like ['+ts5', '-ts2'] or ['+tc5', '-tc3']. 28 | ''' 29 | flag = 0 30 | for i in opt_list: 31 | i = i.strip() 32 | if i.startswith('+'): 33 | tlist.append(i[1:]) 34 | elif i.startswith('-'): 35 | if i[1:] in tlist: 36 | tlist.remove(i[1:]) 37 | else: 38 | print 'bad argument: %s is not in %s' % (i[1:], tlist) 39 | flag = 1 40 | else: 41 | print 'bad argument: %s' % i 42 | flag = 1 43 | if flag: 44 | return flag 45 | else: 46 | return tlist 47 | 48 | if __name__ == '__main__': 49 | args = docopt(__doc__) 50 | ts_arg = args.get('--testsuite') 51 | tc_arg = args.get('--testcase') 52 | if ts_arg: 53 | ts_opt_list = ts_arg.strip().split(',') 54 | testsuites = add_remove(testsuites, ts_opt_list) 55 | if tc_arg: 56 | tc_opt_list = tc_arg.strip().split(',') 57 | testcases = add_remove(testcases, tc_opt_list) 58 | if testsuites != 1 and testcases != 1: 59 | print 'ts: %s' % testsuites 60 | print 'tc: %s' % testcases 61 | -------------------------------------------------------------------------------- /py2020/crawler_douban_movie.py: -------------------------------------------------------------------------------- 1 | import time 2 | import asyncio 3 | import aiohttp 4 | from bs4 import BeautifulSoup 5 | 6 | 7 | async def fetch_content(url): 8 | default_header = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:77.0) Gecko/20100101 Firefox/77.0"} 9 | async with aiohttp.ClientSession( 10 | headers=default_header, connector=aiohttp.TCPConnector(ssl=False) 11 | ) as session: 12 | async with session.get(url) as response: 13 | return await response.text() 14 | 15 | 16 | async def main(): 17 | url = "https://movie.douban.com/cinema/later/hangzhou/" 18 | init_page = await fetch_content(url) 19 | init_soup = BeautifulSoup(init_page, 'lxml') 20 | 21 | movie_names, urls_to_fetch, movie_dates = [], [], [] 22 | 23 | all_movies = init_soup.find('div', id="showing-soon") 24 | for each_movie in all_movies.find_all('div', class_="item"): 25 | all_a_tag = each_movie.find_all('a') 26 | all_li_tag = each_movie.find_all('li') 27 | 28 | movie_names.append(all_a_tag[1].text) 29 | urls_to_fetch.append(all_a_tag[1]['href']) 30 | movie_dates.append(all_li_tag[0].text) 31 | 32 | tasks = [fetch_content(url) for url in urls_to_fetch] 33 | pages = await asyncio.gather(*tasks) 34 | 35 | for movie_name, movie_date, page in zip(movie_names, movie_dates, pages): 36 | soup_item = BeautifulSoup(page, 'lxml') 37 | img_tag = soup_item.find('img') 38 | 39 | print('{} {} {}'.format(movie_name, movie_date, img_tag['src'])) 40 | 41 | 42 | if '__name__' == '__main__': 43 | start = time.time() 44 | print('start: {}'.format(start)) 45 | asyncio.run(main()) 46 | end = time.time() 47 | print('end: {}'.format(end)) 48 | print('it took {} seconds.'.format(end - start)) 49 | -------------------------------------------------------------------------------- /py2015/unified_order_fabfile.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # a fabfile to manange the performance test for unified order project. 3 | # usage: fab -f unified_order_fabfile.py start_jmeter -P -z 30 4 | # author: Jay 5 | 6 | from fabric.context_managers import cd 7 | from fabric.operations import run, put 8 | from fabric.api import task, env 9 | 10 | env.hosts = ['192.168.1.2', '192.168.1.3', '192.168.1.4'] 11 | env.port = 22 12 | env.user = 'root' 13 | env.password = '123456' 14 | 15 | 16 | @task 17 | def hostname(): 18 | # show hostname # just for testing 19 | with cd('/tmp'): 20 | run('hostname') 21 | 22 | 23 | @task 24 | def copy_jmeter(): 25 | # copy jmeter to other machines 26 | with cd('/tmp'): 27 | run('rm -rf jakarta-jmeter-2.3.4') 28 | put('jakarta-jmeter-2.3.4', '/tmp/') 29 | run('cd jakarta-jmeter-2.3.4/bin; chmod a+x jmeter') 30 | #run('ls /tmp/') 31 | 32 | 33 | @task 34 | def start_jmeter(): 35 | # run jmeter in all test clients 36 | #with cd('/tmp/'): 37 | with cd('/tmp/jakarta-jmeter-2.3.4/bin/'): 38 | run('screen -d -m ./jmeter -n -t my-order.jmx -l log.jtl &>abc.log') 39 | #run('./jmeter -n -t unified-order.jmx -l log.jtl &>abc.log') 40 | #run('screen -d -m sleep 10', pty=False) 41 | #run('service tomcat start', pty=False) 42 | 43 | 44 | @task 45 | def kill_jmeter(): 46 | # kill the jmeter processes for unified order project 47 | with cd('/tmp/'): 48 | pids = run("ps -ef | grep unified | grep -v 'grep' | awk '{print $2'}") 49 | pid_list = pids.split('\r\n') 50 | for i in pid_list: 51 | run('kill -9 %s' % i) 52 | 53 | 54 | @task 55 | def get_status(): 56 | # get jmeter(java) running status 57 | with cd('/tmp'): 58 | run('ps -ef | grep unified | grep java | grep -v grep') 59 | -------------------------------------------------------------------------------- /py2022/ssh_cmd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # *_* coding=utf-8 *_* 3 | 4 | import paramiko 5 | 6 | hostname_list = ['192.168.1.2', '192.162.1.3'] 7 | username = 'root' 8 | password = 'yourpassword' 9 | username = 'admin' 10 | password = '' 11 | port = 22 12 | 13 | 14 | hostname_list = [] 15 | 16 | def get_hosts(h_file): 17 | with open(h_file) as f: 18 | for l in f.readlines(): 19 | hostname_list.append(l.strip()) 20 | 21 | 22 | def exec_cmd(cmd): 23 | ''' exec a cmd on a remote linux system ''' 24 | for h in hostname_list: 25 | try: 26 | client = paramiko.SSHClient() 27 | client.load_system_host_keys() 28 | client.set_missing_host_key_policy(paramiko.MissingHostKeyPolicy) 29 | print('connecting: %s' % h) 30 | client.connect(h, port=port, username=username, password=password, 31 | timeout=5) 32 | # 多次执行命令 可复用同一个Channel 33 | chan = client.get_transport().open_session() 34 | print('exec cmd: %s' % cmd) 35 | chan.exec_command(cmd) 36 | print('exit code: %d' % chan.recv_exit_status()) 37 | if chan.recv_exit_status() == 0: 38 | print('%s OK' % h) 39 | else: 40 | print('%s Error!' % h) 41 | print(chan.recv(200).strip()) 42 | # stdin, stdout, stderr = client.exec_command(cmd) 43 | # print(stdout.read().strip()) 44 | # print(stderr.read().strip()) 45 | except Exception as e: 46 | print(e) 47 | finally: 48 | chan.close() 49 | client.close() 50 | 51 | 52 | if __name__ == '__main__': 53 | host_file = 'temp_test_ips' 54 | get_hosts(host_file) 55 | cmd = 'uptime' 56 | cmd = 'echo $(date)>>/tmp/a; sleep 1; uptime; exit 1' 57 | exec_cmd(cmd) 58 | -------------------------------------------------------------------------------- /leetcode/medium/length_of_longest_substring.py: -------------------------------------------------------------------------------- 1 | # -*- coding:UTF-8 -*- 2 | ''' 3 | 给定一个字符串 s ,请你找出其中不含有重复字符的 最长子串 的长度。 4 | 5 | 示例 1: 6 | 输入: s = "abcabcbb" 7 | 输出: 3 8 | 解释: 因为无重复字符的最长子串是 "abc",所以其长度为 3。 9 | 10 | 示例 2: 11 | 输入: s = "bbbbb" 12 | 输出: 1 13 | 解释: 因为无重复字符的最长子串是 "b",所以其长度为 1。 14 | 15 | s 由英文字母、数字、符号和空格组成 16 | 17 | 18 | 思路: 19 | 我们使用两个指针表示字符串中的某个子串(或窗口)的左右边界,其中左指针代表着上文中「枚举子串的起始位置」,而右指针即为上文中的 rk ; 20 | 在每一步的操作中,我们会将左指针向右移动一格,表示 我们开始枚举下一个字符作为起始位置,然后我们可以不断地向右移动右指针,但需要保证这两个指针对应的子串中没有重复的字符。在移动结束后,这个子串就对应着 以左指针开始的,不包含重复字符的最长子串。我们记录下这个子串的长度; 21 | 在枚举结束后,我们找到的最长的子串的长度即为答案。 22 | 23 | 判断 是否有重复的字符,常用的数据结构为哈希集合 python中用set() 24 | 在左指针向右移动的时候,我们从哈希集合中移除一个字符,在右指针向右移动的时候,我们往哈希集合中添加一个字符。 25 | 26 | 27 | 作者:LeetCode-Solution 28 | 链接:https://leetcode-cn.com/problems/longest-substring-without-repeating-characters/solution/wu-zhong-fu-zi-fu-de-zui-chang-zi-chuan-by-leetc-2/ 29 | 来源:力扣(LeetCode) 30 | 著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。 31 | 32 | 来源:力扣(LeetCode) 33 | 链接:https://leetcode-cn.com/problems/longest-substring-without-repeating-characters/solution/wu-zhong-fu-zi-fu-de-zui-chang-zi-chuan-by-leetc-2/ 34 | ''' 35 | 36 | 37 | class Solution: 38 | def lengthOfLongestSubstring(self, s): 39 | # 哈希集合,记录每个字符是否出现过 40 | occ = set() 41 | n = len(s) 42 | # 右指针,初始值为 -1,相当于我们在字符串的左边界的左侧,还没有开始移动 43 | rk, ans = -1, 0 44 | for i in range(n): 45 | if i != 0: 46 | # 左指针向右移动一格,移除一个字符 47 | occ.remove(s[i - 1]) 48 | while rk + 1 < n and s[rk + 1] not in occ: 49 | # 不断地移动右指针 50 | occ.add(s[rk + 1]) 51 | rk += 1 52 | # 第 i 到 rk 个字符是一个极长的无重复字符子串 53 | ans = max(ans, rk - i + 1) 54 | return ans 55 | 56 | 57 | if __name__ == '__main__': 58 | s1 = 'abcabcbb' 59 | s2 = 'bbbbbb' 60 | s = Solution() 61 | print(s.lengthOfLongestSubstring(s1)) 62 | print(s.lengthOfLongestSubstring(s2)) 63 | -------------------------------------------------------------------------------- /py2011/get_rpms.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | ''' 3 | Created on Dec 5, 2011 4 | 5 | @author: Jay Ren 6 | @note: to get and parse html from a url, then, to download all the RPMs listed in the html context 7 | ''' 8 | 9 | import http.client 10 | from html.parser import HTMLParser 11 | import re 12 | import os 13 | 14 | 15 | def get_rpm_list(host, url, rpm_list): 16 | conn = http.client.HTTPConnection(host) 17 | conn.request("GET", url) 18 | res = conn.getresponse() 19 | data = res.read() 20 | str = data.decode("utf-8") 21 | parser = my_html_parser(rpm_list) 22 | parser.feed(str) 23 | 24 | 25 | class my_html_parser(HTMLParser): 26 | 27 | def __init__(self, rpm_list): 28 | HTMLParser.__init__(self) 29 | rpm_list = rpm_list 30 | 31 | def handle_starttag(self, tag, attrs): 32 | if tag == 'a': 33 | for name, value in attrs: 34 | if name == "href": 35 | if re.search('\.rpm$', value): 36 | rpm_list.append(value) 37 | 38 | 39 | def download_rpms(rpm_list): 40 | path = base_dir + dir + '/' 41 | for rpm in rpm_list: 42 | rpm_url = url+rpm 43 | local_name = path + rpm 44 | if os.path.exists(local_name): 45 | os.remove(local_name) 46 | f = open(local_name, 'wb') 47 | conn = http.client.HTTPConnection(host) 48 | conn.request("GET", rpm_url) 49 | res = conn.getresponse() 50 | f.write(res.read()) 51 | 52 | 53 | def prepare_dir(base_dir, dir): 54 | path = base_dir + dir 55 | if not os.path.exists(path): 56 | os.makedirs(path) 57 | 58 | if __name__ == '__main__': 59 | host = 'XXX.XXX.com' 60 | url = "/pub/ISO/redhat/redhat-rhel6/RHEL-6.2-GA/Server/optional/x86_64/os/Packages/" 61 | rpm_list = [] 62 | base_dir = '/home/master/Downloads/' 63 | dir = 'temp_packages' 64 | get_rpm_list(host, url, rpm_list) 65 | prepare_dir(base_dir, dir) 66 | download_rpms(rpm_list) 67 | -------------------------------------------------------------------------------- /py-libvirt/libvirt-dominfo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # Get domain info via libvirt python API. 3 | # Tested with python2.6 and libvirt-python-0.9.10 on KVM host. 4 | 5 | import libvirt 6 | import sys 7 | 8 | def createConnection(): 9 | conn = libvirt.openReadOnly(None) 10 | if conn == None: 11 | print 'Failed to open connection to QEMU/KVM' 12 | sys.exit(1) 13 | else: 14 | print '-----Connection is created successfully-----' 15 | return conn 16 | 17 | def closeConnnection(conn): 18 | print '' 19 | try: 20 | conn.close() 21 | except: 22 | print 'Failed to close the connection' 23 | return 1 24 | 25 | print 'Connection is closed' 26 | 27 | def getDomInfoByName(conn, name): 28 | print '' 29 | print '----------- get domain info by name ----------"' 30 | try: 31 | myDom = conn.lookupByName(name) 32 | except: 33 | print 'Failed to find the domain with name "%s"' % name 34 | return 1 35 | 36 | print "Dom id: %d name: %s" % (myDom.ID(), myDom.name()) 37 | print "Dom state: %s" % myDom.state(0) 38 | print "Dom info: %s" % myDom.info() 39 | print "memory: %d MB" % (myDom.maxMemory()/1024) 40 | print "memory status: %s" % myDom.memoryStats() 41 | print "vCPUs: %d" % myDom.maxVcpus() 42 | 43 | def getDomInfoByID(conn, id): 44 | print '' 45 | print '----------- get domain info by ID ----------"' 46 | try: 47 | myDom = conn.lookupByID(id) 48 | except: 49 | print 'Failed to find the domain with ID "%d"' % id 50 | return 1 51 | 52 | print "Domain id is %d ; Name is %s" % (myDom.ID(), myDom.name()) 53 | 54 | if __name__ == '__main__': 55 | name1 = "kvm-guest" 56 | name2 = "notExist" 57 | id1 = 3 58 | id2 = 9999 59 | print "---Get domain info via libvirt python API---" 60 | conn = createConnection() 61 | getDomInfoByName(conn, name1) 62 | getDomInfoByName(conn, name2) 63 | getDomInfoByID(conn, id1) 64 | getDomInfoByID(conn, id2) 65 | closeConnnection(conn) 66 | -------------------------------------------------------------------------------- /py2011/get_stock_price.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python3 2 | # coding=utf-8 3 | 4 | ''' 5 | Created on Nov 9, 2011 6 | 7 | @author: Jay Ren 8 | @note: get some stock prices. need to know how to deal with some numbers and print formate. 9 | ''' 10 | 11 | import urllib.request 12 | 13 | def get_price(code): 14 | url = 'http://hq.sinajs.cn/?list=%s' % code 15 | req = urllib.request.Request(url) 16 | req.set_proxy('proxy01.XXXX.com:8080', 'http') 17 | content = urllib.request.urlopen(req).read() 18 | #print(content) 19 | str = content.decode('gbk') 20 | #print(content) 21 | #print(str) 22 | data = str.split('"')[1].split(',') 23 | name = "%-6s" % data[0] 24 | price_current = "%-6s" % float(data[3]) 25 | change_percent = ( float(data[3]) - float(data[2]) )*100 / float(data[2]) 26 | change_percent = "%s%%" % round(change_percent, 2) 27 | change_percent = "%-6s" % change_percent 28 | print("股票名称:{0} 涨跌幅:{1} 最新价:{2}".format(name, change_percent, price_current) ) 29 | 30 | def get_all_price(code_list): 31 | for code in code_list: 32 | get_price(code) 33 | 34 | def get_index(code): 35 | url = 'http://hq.sinajs.cn/?list=%s' % code 36 | req = urllib.request.Request(url) 37 | req.set_proxy('proxy01.XXXX.com:8080', 'http') 38 | content = urllib.request.urlopen(req).read() 39 | #print(content) 40 | str = content.decode('gbk') 41 | #print(content) 42 | #print(str) 43 | data = str.split('"')[1].split(',') 44 | name = "%-6s" % data[0] 45 | price_current = "%-6s" % float(data[3]) 46 | change_percent = ( float(data[3]) - float(data[2]) )*100 / float(data[2]) 47 | change_percent = "%-6s" % ("%s%%" % round(change_percent, 2)) 48 | print("指数名称:{0} 涨跌幅:{1} 最新指数:{2}".format(name, change_percent, price_current) ) 49 | 50 | def get_all_index(index_code_list): 51 | for code in index_code_list: 52 | get_index(code) 53 | 54 | code_list = ['sz300036', 'sz000977', 'sh600718', 'sh600489'] 55 | get_all_price(code_list) 56 | print("-----------------------------------------------------------------") 57 | index_code_list = ['sh000001', 'sz399001'] 58 | get_all_index(index_code_list) 59 | -------------------------------------------------------------------------------- /py2015/range_xrange.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | ''' 3 | Created on Jul 14, 2015 4 | 5 | @author: Jay 6 | ''' 7 | 8 | 9 | def my_range(start, end=None, step=1): 10 | result = [] 11 | if not isinstance(start, int): 12 | return 'start argument must be an integer.' 13 | if (not isinstance(end, int)) and (not end is None): 14 | return 'end argument must be an integer.' 15 | if not isinstance(step, int): 16 | return 'step argument must be an integer.' 17 | elif step == 0: 18 | return 'step argument must not be zero.' 19 | if isinstance(end, int): 20 | while True: 21 | if start < end: 22 | result.append(start) 23 | start += step 24 | else: 25 | break 26 | else: # end is None 27 | start, end = 0, start 28 | while True: 29 | if start < end: 30 | result.append(start) 31 | start += step 32 | else: 33 | break 34 | return result 35 | 36 | 37 | # 跟range函数的实现基本一样,只是使用yield关键字表示生成器 38 | def my_xrange(start, end=None, step=1): 39 | if not isinstance(start, int): 40 | pass 41 | if (not isinstance(end, int)) and (not end is None): 42 | pass 43 | if not isinstance(step, int): 44 | pass 45 | elif step == 0: 46 | pass 47 | if isinstance(end, int): 48 | while True: 49 | if start < end: 50 | yield start 51 | start += step 52 | else: 53 | break 54 | else: # end is None 55 | start, end = 0, start 56 | while True: 57 | if start < end: 58 | yield start 59 | start += step 60 | else: 61 | break 62 | 63 | 64 | if __name__ == '__main__': 65 | print my_range(8) 66 | print my_range(8, 1, 1) 67 | print my_range(8, 1.5, 1) 68 | print my_range(1, 9) 69 | print my_range(1, 9, 0) 70 | print [i for i in my_xrange(8)] 71 | print [i for i in my_xrange(8, 1, 1)] 72 | print [i for i in my_xrange(8, 1.5, 1)] 73 | print [i for i in my_xrange(1, 9)] 74 | -------------------------------------------------------------------------------- /leetcode/easy/reverse_list.py: -------------------------------------------------------------------------------- 1 | # *_* coding=utf-8 *_* 2 | 3 | ''' 4 | 反转链表: 5 | https://leetcode-cn.com/problems/reverse-linked-list/ 6 | 7 | 实现了一个链表;并且用 迭代/递归 两种方法进行反转。 8 | 9 | 1. 迭代:时间复杂度 O(N) 空间复杂度 O(1) 10 | 2. 递归:时间复杂度 O(N) 空间复杂度 O(N) 11 | 12 | ''' 13 | 14 | 15 | # Definition for singly-linked list. 16 | class ListNode(object): 17 | def __init__(self, val=0, next=None): 18 | self.val = val 19 | self.next = next 20 | 21 | 22 | class LinkedList(object): 23 | # 通过一个list 初始化一个链表 24 | def __init__(self, l=[1, 2, 3, 4, 5, 6]): 25 | self.head = ListNode(l[0]) 26 | cur = self.head 27 | for i in l[1:]: 28 | cur.next = ListNode(i) 29 | cur = cur.next 30 | 31 | def print_linked_list(self): 32 | # 为了保持链表 head 不被破坏 33 | temp_head = self.head 34 | while temp_head: 35 | print(temp_head.val) 36 | temp_head = temp_head.next 37 | 38 | 39 | class Solution(object): 40 | # 遍历方式反转链表 41 | def reverse_list(self, head): 42 | """ 43 | :type head: ListNode 44 | :rtype: ListNode 45 | """ 46 | cur, pre = head, None 47 | while cur: 48 | # pre, pre.next, cur = cur, pre, cur.next 49 | temp = cur.next 50 | cur.next = pre 51 | pre = cur 52 | cur = temp 53 | return pre 54 | 55 | # 递归方式反转链表 56 | def reverse_list_recursion(self, head): 57 | """ 58 | :type head: ListNode 59 | :rtype: ListNode 60 | """ 61 | if (not head) or (not head.next): 62 | return head 63 | node = self.reverse_list_recursion(head.next) 64 | head.next.next = head 65 | head.next = None 66 | return node 67 | 68 | 69 | if __name__ == '__main__': 70 | ll = LinkedList(l=[1, 2, 3, 4, 5]) 71 | print('------------------------------') 72 | print('before reverse') 73 | ll.print_linked_list() 74 | h = ll.head 75 | s = Solution() 76 | ll.head = s.reverse_list(h) 77 | print('------------------------------') 78 | print('after reverse: reverse_list()') 79 | ll.print_linked_list() 80 | print('------------------------------') 81 | ll.head = s.reverse_list_recursion(ll.head) 82 | print('after reverse: reverse_list_recursion()') 83 | ll.print_linked_list() 84 | 85 | -------------------------------------------------------------------------------- /py2013/python3-mysql.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # a sample to use mysql-connector for python3 3 | # see details from http://dev.mysql.com/doc/connector-python/en/index.html 4 | 5 | import mysql.connector 6 | import sys 7 | import os 8 | 9 | user = 'root' 10 | pwd = '123456' 11 | host = '127.0.0.1' 12 | db = 'test' 13 | 14 | data_file = 'mysql-test.dat' 15 | 16 | create_table_sql = "CREATE TABLE IF NOT EXISTS mytable ( \ 17 | id int(10) AUTO_INCREMENT PRIMARY KEY, \ 18 | name varchar(20), age int(4) ) \ 19 | CHARACTER SET utf8" 20 | 21 | insert_sql = "INSERT INTO mytable(name, age) VALUES ('Jay', 22 ), ('杰', 26)" 22 | select_sql = "SELECT id, name, age FROM mytable" 23 | 24 | cnx = mysql.connector.connect(user=user, password=pwd, host=host, database=db) 25 | cursor = cnx.cursor() 26 | 27 | try: 28 | cursor.execute(create_table_sql) 29 | except mysql.connector.Error as err: 30 | print("create table 'mytable' failed.") 31 | print("Error: {}".format(err.msg)) 32 | sys.exit() 33 | 34 | try: 35 | cursor.execute(insert_sql) 36 | except mysql.connector.Error as err: 37 | print("insert table 'mytable' failed.") 38 | print("Error: {}".format(err.msg)) 39 | sys.exit() 40 | 41 | if os.path.exists(data_file): 42 | myfile = open(data_file) 43 | lines = myfile.readlines() 44 | myfile.close() 45 | 46 | for line in lines: 47 | myset = line.split() 48 | sql = "INSERT INTO mytable (name, age) VALUES ('{}', {})".format( 49 | myset 50 | [0], 51 | myset 52 | [1]) 53 | try: 54 | cursor.execute(sql) 55 | except mysql.connector.Error as err: 56 | print("insert table 'mytable' from file 'mysql-test.dat' - failed.") 57 | print("Error: {}".format(err.msg)) 58 | sys.exit() 59 | 60 | try: 61 | cursor.execute(select_sql) 62 | for (id, name, age) in cursor: 63 | print("ID:{} Name:{} Age:{}".format(id, name, age)) 64 | except mysql.connector.Error as err: 65 | print("query table 'mytable' failed.") 66 | print("Error: {}".format(err.msg)) 67 | sys.exit() 68 | 69 | cnx.commit() 70 | cursor.close() 71 | cnx.close() 72 | -------------------------------------------------------------------------------- /leetcode/easy/remove_duplicates.py: -------------------------------------------------------------------------------- 1 | # -*- coding:UTF-8 -*- 2 | ''' 3 | 给出由小写字母组成的字符串 S,重复项删除操作会选择两个相邻且相同的字母,并删除它们。 4 | 在 S 上反复执行重复项删除操作,直到无法继续删除。 5 | 在完成所有重复项删除操作后返回最终的字符串。答案保证唯一。 6 | 7 | 示例: 8 | 9 | 输入:"abbaca" 10 | 输出:"ca" 11 | 解释: 12 | 例如,在 "abbaca" 中,我们可以删除 "bb" 由于两字母相邻且相同,这是此时唯一可以执行删除操作的重复项。之后我们得到字符串 "aaca",其中又只有 "aa" 可以执行重复项删除操作,所以最后的字符串为 "ca"。 13 | 14 | 提示: 15 | * 1 <= S.length <= 20000 16 | * S 仅由小写英文字母组成。 17 | 18 | 来源:力扣(LeetCode) 19 | 链接:https://leetcode-cn.com/problems/remove-all-adjacent-duplicates-in-string 20 | ''' 21 | 22 | class Solution(object): 23 | def removeDuplicates(self, S): 24 | """ 25 | :type S: str 26 | :rtype: str 27 | 这是通过递归的方法来做,时间复杂度很高 O(n^2) 28 | """ 29 | length = len(S) 30 | for i in range(length-1): 31 | if S[i] == S[i+1]: 32 | S1 = S.replace('%s%s' % (S[i], S[i+1]), '') 33 | # print(S1) 34 | return self.removeDuplicates(S1) 35 | return S 36 | 37 | 38 | def removeDuplicates_1(self, S): 39 | """ 40 | :type S: str 41 | :rtype: str 42 | 使用 栈,代码非常简洁,时间复杂度 O(n) ,空间复杂度O(n) 43 | 消除一对相邻重复项可能会导致新的相邻重复项出现,如从字符串 abba中删除 bb 会导致出现新的相邻重复项 aa 出现。 44 | 因此我们需要保存当前还未被删除的字符。一种显而易见的数据结构呼之欲出:栈。 45 | 我们只需要遍历该字符串,如果当前字符和栈顶字符相同,我们就贪心地将其消去,否则就将其入栈即可。 46 | """ 47 | stack = [''] 48 | for i in S: 49 | if i == stack[-1]: 50 | stack.pop(-1) 51 | else: 52 | stack.append(i) 53 | return ''.join(stack) 54 | 55 | 56 | def removeDuplicates_2(self, S): 57 | """ 58 | :type S: str 59 | :rtype: str 60 | 使用 双指针 , 时间复杂度 O(n),空间复杂度 O(n) 61 | 挨着两个相同的同时消失,可以使用两个指针。 62 | * 一个right一直往右移动,然后把指向的值递给left指向的值即可。 63 | * 一个left每次都会比较挨着的两个是否相同,如果相同,他两同时消失 64 | """ 65 | left = 0 66 | right = 0 67 | length = len(S) 68 | l1 = list(S) 69 | while (right < length): 70 | l1[left] = l1[right] 71 | if (left > 0) and l1[left - 1] == l1[left]: 72 | left -= 2 73 | left += 1 74 | right += 1 75 | return ''.join(l1[:left]) 76 | 77 | 78 | if __name__ == '__main__': 79 | s = Solution() 80 | str1 = "aababaab" 81 | print(s.removeDuplicates(str1)) 82 | print(s.removeDuplicates_1(str1)) 83 | print(s.removeDuplicates_2(str1)) 84 | str1 = "abbaca" 85 | print(s.removeDuplicates(str1)) 86 | print(s.removeDuplicates_1(str1)) 87 | print(s.removeDuplicates_2(str1)) 88 | -------------------------------------------------------------------------------- /py2018/set_check_localtime.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 2 | 3 | import sys 4 | import time 5 | import subprocess 6 | import argparse 7 | import urllib2 8 | 9 | 10 | def set_beijing_time_from_web(url): 11 | ''' set os and hardware clock as beijing time from internet ''' 12 | # use urllib2 in python2; not use requests which need installation 13 | response = urllib2.urlopen(url) 14 | #print response.read() 15 | # 获取http头date部分 16 | ts = response.headers['date'] 17 | # 将日期时间字符转化为time 18 | gmt_time = time.strptime(ts[5:25], "%d %b %Y %H:%M:%S") 19 | # 将GMT时间转换成北京时间 20 | local_time = time.localtime(time.mktime(gmt_time) + 8*3600) 21 | str1 = "%u-%02u-%02u" % (local_time.tm_year, 22 | local_time.tm_mon, local_time.tm_mday) 23 | str2 = "%02u:%02u:%02u" % ( 24 | local_time.tm_hour, local_time.tm_min, local_time.tm_sec) 25 | cmd = 'date -s "%s %s"' % (str1, str2) 26 | #print cmd 27 | subprocess.check_call(cmd, shell=True) 28 | hw_cmd = 'hwclock -w' 29 | #print hw_cmd 30 | subprocess.check_call(hw_cmd, shell=True) 31 | print 'OK. set time: %s' % ' '.join([str1, str2]) 32 | 33 | 34 | def check_localtime_with_internet(url): 35 | ''' check local time with internet ''' 36 | threshold = 2 37 | # use urllib2 in python2; not use requests which need installation 38 | response = urllib2.urlopen(url) 39 | #print response.read() 40 | # 获取http头date部分 41 | ts = response.headers['date'] 42 | # 将日期时间字符转化为time 43 | gmt_time = time.strptime(ts[5:25], "%d %b %Y %H:%M:%S") 44 | # 将GMT时间转换成北京时间 45 | internet_ts = time.mktime(gmt_time) 46 | local_ts = time.mktime(time.gmtime()) 47 | if abs(local_ts - internet_ts) <= threshold: 48 | print 'OK. check localtime.' 49 | else: 50 | print 'ERROR! local_ts: %s internet_ts:%s' % (local_ts, internet_ts) 51 | sys.exit(1) 52 | 53 | 54 | if __name__ == '__main__': 55 | url = 'http://www.baidu.com' 56 | parser = argparse.ArgumentParser() 57 | parser.description = 'set/check localtime (i.e. CST) with internet' 58 | parser.add_argument('-c', '--check', action='store_true', 59 | help='only check local time') 60 | parser.add_argument('-s', '--set', action='store_true', 61 | help='only set local time') 62 | parser.add_argument('-u', '--url', default=url, 63 | help='the url to sync time') 64 | args = parser.parse_args() 65 | if args.set: 66 | set_beijing_time_from_web(args.url) 67 | else: 68 | check_localtime_with_internet(args.url) 69 | -------------------------------------------------------------------------------- /leetcode/easy/word_pattern.py: -------------------------------------------------------------------------------- 1 | # *_* coding=utf-8 *_* 2 | 3 | ''' 4 | 给定一种规律 pattern 和一个字符串 str ,判断 str 是否遵循相同的规律。 5 | 这里的 遵循 指完全匹配,例如, pattern 里的每个字母和字符串 str 中的每个非空单词之间存在着双向连接的对应规律。 6 | 7 | 说明: 你可以假设 pattern 只包含小写字母, str 包含了由单个空格分隔的小写字母。 8 | 9 | 示例1: 10 | 输入: pattern = "abba", str = "dog cat cat dog" 11 | 输出: true 12 | 13 | 示例 2: 14 | 输入:pattern = "abba", str = "dog cat cat fish" 15 | 输出: false 16 | 17 | 链接:https://leetcode-cn.com/problems/word-pattern 18 | 19 | 以第2中为例:时间复杂度O(n+m),空间复杂度O(n+m) 20 | ''' 21 | 22 | class Solution(object): 23 | def wordPattern(self, pattern, s): 24 | """ 25 | :type pattern: str 26 | :type s: str 27 | :rtype: bool 28 | """ 29 | s_list = s.strip().split() 30 | my_map = dict() 31 | if len(pattern) != len(s_list): 32 | return False 33 | for i, j in zip(pattern, s_list): 34 | if i in my_map: 35 | if my_map[i] != j: 36 | return False 37 | else: 38 | if j in my_map.values(): 39 | return False 40 | else: 41 | my_map[i] = j 42 | return True 43 | 44 | def wordPattern_1(self, pattern, s): 45 | """ 46 | :type pattern: str 47 | :type s: str 48 | :rtype: bool 49 | """ 50 | s_list = s.strip().split() 51 | ch_map = dict() 52 | word_map = dict() 53 | if len(pattern) != len(s_list): 54 | return False 55 | for i, j in zip(pattern, s_list): 56 | if (i in ch_map and ch_map[i] != j) or (j in word_map and word_map[j] != i): 57 | return False 58 | else: 59 | ch_map[i] = j 60 | word_map[j] = i 61 | return True 62 | 63 | def wordPattern_2(self, pattern, s): 64 | """ 65 | :type pattern: str 66 | :type s: str 67 | :rtype: bool 68 | """ 69 | res=s.split() 70 | return list(map(pattern.index, pattern))==list(map(res.index,res)) 71 | 72 | 73 | if __name__ == '__main__': 74 | s = Solution() 75 | p1 = 'abba' 76 | s1 = 'dog cat cat dog' 77 | print(s.wordPattern(p1, s1)) 78 | print(s.wordPattern_1(p1, s1)) 79 | print(s.wordPattern_2(p1, s1)) 80 | p2 = 'abba' 81 | s2 = 'dog cat cat fish' 82 | print(s.wordPattern(p2, s2)) 83 | print(s.wordPattern_1(p2, s2)) 84 | print(s.wordPattern_2(p2, s2)) 85 | p3 = 'abba' 86 | s3 = 'dog dog dog dog' 87 | print(s.wordPattern(p3, s3)) 88 | print(s.wordPattern_1(p3, s3)) 89 | print(s.wordPattern_2(p3, s3)) 90 | -------------------------------------------------------------------------------- /py2016/cleanup_pid.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import subprocess 5 | 6 | base_dir = '/tmp/pid_dir' 7 | pid_files = ['ut.pid', 'ft.pid'] 8 | max_seconds = 48 * 3600 9 | 10 | 11 | def check_pid(pid): 12 | """ Check For the existence of a unix pid. """ 13 | try: 14 | os.kill(pid, 0) 15 | except OSError: 16 | return False 17 | else: 18 | return True 19 | 20 | 21 | def get_elapsed_time(pid): 22 | '''get the elapsed time of the process with this pid''' 23 | cmd = 'ps -p %s -o pid,etime' % str(pid) 24 | proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) 25 | # get data from stdout 26 | proc.wait() 27 | results = proc.stdout.readlines() 28 | # parse data (should only be one) 29 | for result in results: 30 | try: 31 | result.strip() 32 | if result.split()[0] == str(pid): 33 | pidInfo = result.split()[1] 34 | # stop after the first one we find 35 | break 36 | except IndexError: 37 | pass # ignore it 38 | else: 39 | # didn't find one 40 | print "Process PID %s doesn't seem to exist!" % pid 41 | return 0 42 | pidInfo = [result.split()[1] for result in results 43 | if result.split()[0] == str(pid)][0] 44 | pidInfo = pidInfo.partition("-") 45 | if pidInfo[1] == '-': 46 | # there is a day 47 | days = int(pidInfo[0]) 48 | rest = pidInfo[2].split(":") 49 | hours = int(rest[0]) 50 | minutes = int(rest[1]) 51 | seconds = int(rest[2]) 52 | else: 53 | days = 0 54 | rest = pidInfo[0].split(":") 55 | if len(rest) == 3: 56 | hours = int(rest[0]) 57 | minutes = int(rest[1]) 58 | seconds = int(rest[2]) 59 | elif len(rest) == 2: 60 | hours = 0 61 | minutes = int(rest[0]) 62 | seconds = int(rest[1]) 63 | else: 64 | hours = 0 65 | minutes = 0 66 | seconds = int(rest[0]) 67 | 68 | elapsed_time = days*24*3600 + hours*3600 + minutes*60 + seconds 69 | return elapsed_time 70 | 71 | 72 | def remove_pid(pidfiles): 73 | '''remove pid files if the process is not running.''' 74 | for i in pidfiles: 75 | filepath = '%s/%s' % (base_dir, i) 76 | if os.path.exists(filepath): 77 | del_flag = 0 78 | with open(filepath) as f: 79 | pid = f.read() 80 | if not check_pid(int(pid)): 81 | print 'pid file: %s' % i 82 | print 'process does not exist with pid %s' % pid 83 | del_flag = 1 84 | elif get_elapsed_time(pid) > max_seconds: 85 | print 'elapsed_time is greater than max_seconds' 86 | print 'tring to kill pid %s' % pid 87 | os.kill(int(pid), 9) 88 | del_flag = 1 89 | if del_flag: 90 | os.unlink(filepath) 91 | 92 | 93 | if __name__ == '__main__': 94 | remove_pid(pid_files) 95 | -------------------------------------------------------------------------------- /py2015/download_repos.py: -------------------------------------------------------------------------------- 1 | ''' 2 | package installation command: 3 | pip install beautifulsoup4 requests 4 | (bs4 doesn't work with python 2.6, so this only work on python 2.7) 5 | ''' 6 | 7 | import requests 8 | from bs4 import BeautifulSoup 9 | import os 10 | from urllib import unquote 11 | 12 | 13 | class repos(object): 14 | 15 | """download linux repos from mirrors' site.""" 16 | 17 | headers = { 18 | 'User-Agent': 'Mozilla/5.0 (Macintosh) Gecko/20100101 Firefox/42.0'} 19 | urls_dict = {} 20 | 21 | def __init__(self, base_url, base_dir): 22 | super(repos, self).__init__() 23 | self.base_url = base_url 24 | self.base_dir = base_dir 25 | 26 | def download(self): 27 | for i in self.urls_dict: 28 | for j in self.urls_dict[i]['files']: 29 | url = self.base_url + i + j 30 | print url 31 | request = requests.get(url, headers=self.headers) 32 | if request.ok: 33 | file_location = self.base_dir + i + j 34 | print file_location 35 | if not os.path.exists(self.base_dir + i): 36 | os.makedirs(self.base_dir + i) 37 | with open(file_location, "wb") as the_file: 38 | the_file.write(request.content) 39 | 40 | def get_urls_dict(self, path='/', parent=None): 41 | if path not in self.urls_dict: 42 | self.urls_dict[path] = { 43 | 'parent': parent, 'sub_dirs': [], 'files': []} 44 | url = self.base_url + path 45 | request = requests.get(url, headers=self.headers) 46 | if request.ok: 47 | soup = BeautifulSoup(request.text, 'html.parser') 48 | for url in soup.find_all('a'): 49 | url_text = unquote(url.get('href')) 50 | if url_text.endswith('/') and url_text != '/' and url_text != '../': 51 | self.urls_dict[path]['sub_dirs'].append(url_text) 52 | elif not url_text.endswith('/') and not url_text.startswith('?'): 53 | self.urls_dict[path]['files'].append(url_text) 54 | if self.urls_dict[path]['parent'] == None and len(self.urls_dict[path]['sub_dirs']) == 0: 55 | pass 56 | elif len(self.urls_dict[path]['sub_dirs']) != 0: 57 | for i in self.urls_dict[path]['sub_dirs']: 58 | return self.get_urls_dict(path=path + i, parent=path) 59 | elif self.urls_dict[path]['parent'] != None and len(self.urls_dict[path]['sub_dirs']) == 0: 60 | self.urls_dict[self.urls_dict[path]['parent']][ 61 | 'sub_dirs'].remove(path.split('/')[-2] + '/') 62 | return self.get_urls_dict(path=self.urls_dict[path]['parent'], 63 | parent=self.urls_dict[self.urls_dict[path]['parent']]['parent']) 64 | 65 | 66 | if __name__ == '__main__': 67 | url = 'http://mirrors.163.com/centos/6.7/os/x86_64' 68 | the_dir = '/tmp/centos6u7' 69 | repo = repos(url, the_dir) 70 | repo.get_urls_dict() 71 | # print repo.urls_dict 72 | repo.download() 73 | -------------------------------------------------------------------------------- /py2014/app_poi.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ''' 4 | Created on Apr 21, 2014 5 | 6 | @author: Jay 7 | ''' 8 | 9 | from StringIO import StringIO 10 | import pycurl 11 | import time 12 | import os 13 | from poi import poi 14 | 15 | 16 | class app_poi(object): 17 | ''' 18 | running DianPing App in an Andorid device or emulator. 19 | we assume DianPing App and my TestApp are already installed in the android system. 20 | ''' 21 | 22 | upload_url = "http://qa-show-web02.nh/upload.php" 23 | adb = '/Users/jay/workspace/adt-bundle-mac-x86_64-20130917/sdk/platform-tools/adb' 24 | app_img_dir = '/mnt/sdcard/Robotium-Screenshots' 25 | 26 | def __init__(self): 27 | pass 28 | 29 | def clear_storage(self): 30 | ''' 31 | clear the Robotium screen shot storage. 32 | ''' 33 | 34 | clear_storage_cmd = '%s shell rm -f %s/*_*.jpg' % (self.adb, self.app_img_dir) 35 | os.system(clear_storage_cmd) 36 | 37 | def gen_img(self, poi_info=None, img_dir='img'): 38 | ''' 39 | generate APP screen snapshot on different poi geo_location 40 | 41 | oi_info must be a list as [shopid, shopname, longitude, latitude, citiyid, cityname] 42 | e.g. [9125655, '龙马川菜馆', 119.940181, 31.969895, 93, '常州'] 43 | ''' 44 | 45 | # which_adb = 'which adb' 46 | # p = os.popen(which_adb) 47 | # adb = p.read().strip() 48 | cwd = os.getcwd() 49 | if not os.path.exists(img_dir): 50 | os.makedirs(img_dir) 51 | cityid = poi_info[4] 52 | lng = poi_info[2] 53 | lat = poi_info[3] 54 | # image name is $cityid_$shopid.jpg '.jpg' will be automatically added in Robotium takecreenshot function. 55 | imgname = '%s_%s' % (cityid, poi_info[0]) 56 | 57 | # currently, I assume there's only one android device/emulator running on the PC 58 | run_test_cmd = '%s shell am instrument -e lng %s -e lat %s -e cityid %s -e imgname %s ' % (self.adb, lng, lat, cityid, imgname)\ 59 | + '-w com.dianping.v1.test/com.dianping.v1.test.MyTestRunner' 60 | os.system(run_test_cmd) 61 | time.sleep(20) 62 | os.chdir(img_dir) 63 | pull_img_cmd = '%s pull %s/%s.jpg %s.jpg' % (self.adb, self.app_img_dir, imgname, imgname) 64 | os.system(pull_img_cmd) 65 | time.sleep(2) 66 | os.chdir(cwd) #need to change current dir to the previous dir. 67 | 68 | # return the absolute path of the image file 69 | return '%s/%s/%s.jpg' % (cwd, img_dir, imgname) 70 | 71 | def upload_img(self, mypic): 72 | ''' 73 | upload the image to the remote server. 74 | ''' 75 | 76 | storage = StringIO() 77 | c = pycurl.Curl() 78 | values = [ 79 | ("upload_file[]", (pycurl.FORM_FILE, str(mypic))) 80 | ] 81 | c.setopt(c.URL, self.upload_url) 82 | c.setopt(c.WRITEFUNCTION, storage.write) 83 | c.setopt(c.HTTPPOST, values) 84 | c.perform() 85 | c.close() 86 | content = storage.getvalue() 87 | print content 88 | 89 | if __name__ == '__main__': 90 | myapp = app_poi() 91 | myapp.clear_storage() 92 | poi = poi() 93 | poi_info_list = poi.xml_to_poi_info_list() 94 | for i in poi_info_list: 95 | mypic = myapp.gen_img(poi_info=i) 96 | myapp.upload_img(mypic) 97 | # myapp.upload_img('/Users/jay/workspace/aew_backend/qa-show/img/93_16887136.jpg') 98 | -------------------------------------------------------------------------------- /py2024/calculate_reserved_mem.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import re 4 | import sys 5 | from subprocess import Popen, PIPE, STDOUT 6 | 7 | 8 | def shell_rc_and_output(cmd): 9 | ''' execute a shell command and get its return code and output (stdout/stderr) ''' 10 | p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT) 11 | out = p.communicate()[0] 12 | rc = p.returncode 13 | if isinstance(out, bytes): 14 | # out is a string 15 | out = out.decode() 16 | return rc, str(out) 17 | 18 | 19 | def parse_memory_line(line): 20 | """ 21 | 解析单行内存映射信息,并返回内存区域的起始和结束地址。 22 | """ 23 | pattern = '.+\[mem (.+)-(.+)\].+' 24 | match = re.search(pattern, line) 25 | # print(match) 26 | if match: 27 | start = match.group(1) 28 | end = match.group(2) 29 | # print(match.group(1)) 30 | # print(match.group(2)) 31 | start = int(start, 16) 32 | end = int(end, 16) 33 | return start, end 34 | 35 | 36 | def calculate_reserved_memory(lines): 37 | """ 38 | 计算所有保留内存区域的总大小。 39 | """ 40 | total_reserved_memory = 0 41 | for line in lines: 42 | if 'mem' in line and 'reserved' in line: 43 | start, end = parse_memory_line(line) 44 | # 计算内存区域的大小,并考虑包含结束地址 45 | size = end - start + 1 46 | total_reserved_memory += size 47 | return total_reserved_memory 48 | 49 | 50 | def calculate_memory(lines): 51 | """ 52 | 计算所有内存区域的总大小。 53 | """ 54 | total_memory = 0 55 | for line in lines: 56 | if 'mem' in line: 57 | start, end = parse_memory_line(line) 58 | # 计算内存区域的大小,并考虑包含结束地址 59 | size = end - start + 1 60 | total_memory += size 61 | return total_memory 62 | 63 | 64 | def get_e820_mem_map(): 65 | """" 66 | 根据 dmesg -T 命令查看 e820表的信息, 返回一个列表。 67 | """ 68 | # 返回如下这样格式的列表 (放在这里仅供代码调试用) 69 | # memory_map = [ 70 | # "BIOS-e820: [mem 0x0000000000000000-0x000000000009ffff] usable", 71 | # "BIOS-e820: [mem 0x0000000000100000-0x000000005ad0efff] usable", 72 | # "BIOS-e820: [mem 0x000000005ad0f000-0x0000000060377fff] reserved", 73 | # "BIOS-e820: [mem 0x0000000060378000-0x000000007fffefff] usable", 74 | # "BIOS-e820: [mem 0x000000007ffff000-0x000000007fffffff] reserved", 75 | # "BIOS-e820: [mem 0x0000000080000000-0x00000000bf8eefff] usable", 76 | # "BIOS-e820: [mem 0x00000000bf8ef000-0x00000000bfb6efff] reserved", 77 | # "BIOS-e820: [mem 0x00000000bfb6f000-0x00000000bfb7efff] ACPI data", 78 | # "BIOS-e820: [mem 0x00000000bfb7f000-0x00000000bfbfefff] ACPI NVS", 79 | # "BIOS-e820: [mem 0x00000000bfbff000-0x00000000bff7bfff] usable", 80 | # "BIOS-e820: [mem 0x00000000bff7c000-0x00000000bfffffff] reserved", 81 | # "BIOS-e820: [mem 0x0000000100000000-0x000000081fffffff] usable", 82 | # "BIOS-e820: [mem 0x0000000820000000-0x000000083fffffff] reserved" 83 | # ] 84 | # return memory_map 85 | cmd = "dmesg -T | grep -Eio ' (bios-e820:.*mem .*)'" 86 | rc, out = shell_rc_and_output(cmd) 87 | if rc != 0: 88 | print("Error! failed to run cmd: {}".format(cmd)) 89 | sys.exit(1) 90 | else: 91 | memory_map = out.split('\n') 92 | return memory_map 93 | 94 | 95 | def print_reserved_ratio(total, reserved): 96 | """ 97 | 计算内存被reserved的比例, 并打印百分比 98 | """ 99 | ratio = reserved / total 100 | percentage = ratio * 100 101 | print("memory reserved percentage: {:.2f}%".format(percentage)) 102 | 103 | 104 | if __name__ == '__main__': 105 | # 获取BIOS-e820内存映射信息 106 | mem_map = get_e820_mem_map() 107 | # 计算总内存大小 108 | total_memory = calculate_memory(mem_map) 109 | total_mem_mb = total_memory / 1024 / 1024 110 | print("Total memory: {:.0f} MB".format(total_mem_mb)) 111 | # 计算保留的内存大小 112 | reserved_memory = calculate_reserved_memory(mem_map) 113 | res_mem_mb = reserved_memory / 1024 / 1024 114 | print("Total reserved memory: {:.0f} MB".format(res_mem_mb)) 115 | print_reserved_ratio(total_mem_mb, res_mem_mb) 116 | -------------------------------------------------------------------------------- /py2016/rpmdb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | # (c) 2016, Jay 5 | # 6 | # This file is part of Ansible 7 | # 8 | # Ansible is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU General Public License as published by 10 | # the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # Ansible is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU General Public License 19 | # along with Ansible. If not, see . 20 | # 21 | 22 | import os 23 | import syslog 24 | import subprocess 25 | import time 26 | 27 | DOCUMENTATION = ''' 28 | --- 29 | module: rpmdb 30 | short_description: Manages the rpm database. 31 | description: 32 | - Check and rebuild the rpm database. 33 | version_added: "2.0" 34 | options: 35 | action: 36 | choices: [ "check", "rebuild" ] 37 | description: 38 | - The action name. 39 | - check: only check if rpm db is OK or not. 40 | - rebuild: rebuild rpm db if it is NOT OK. 41 | required: false 42 | default: check 43 | timeout: 44 | description: 45 | - The TIMEOUT seconds when checking rpm db. 46 | required: false 47 | default: 10 48 | notes: [] 49 | requirements: [ rpm, rm ] 50 | author: Jay 51 | ''' 52 | 53 | EXAMPLES = ''' 54 | - rpmdb: action=check 55 | - rpmdb: action=rebuild 56 | ''' 57 | 58 | # ============================================================== 59 | 60 | 61 | RPMBIN = '/bin/rpm' 62 | 63 | 64 | def log(msg): 65 | syslog.openlog('ansible-%s' % os.path.basename(__file__)) 66 | syslog.syslog(syslog.LOG_NOTICE, msg) 67 | 68 | 69 | def execute_command(module, cmd): 70 | log('Command %s' % '|'.join(cmd)) 71 | return module.run_command(cmd) 72 | 73 | 74 | def check_db(module, timeout=10): 75 | rc = 0 76 | logfile = '/tmp/rpm-qa.log' 77 | elapsed_time = 0 78 | cmd = '%s -qa &> %s' % (RPMBIN, logfile) 79 | child = subprocess.Popen(cmd, shell=True) 80 | 81 | while elapsed_time <= timeout: 82 | child_ret = child.poll() 83 | if child_ret is None: # child still running 84 | time.sleep(1) 85 | elapsed_time += 1 86 | elif child_ret == 0: 87 | if 'error:' in open(logfile, 'r').read(): 88 | rc = 1 89 | break 90 | else: # cmd is excuted with no error. 91 | break 92 | else: 93 | rc = 2 94 | break 95 | if elapsed_time > timeout: 96 | child.kill() 97 | time.sleep(1) 98 | rc = 3 99 | return rc 100 | 101 | 102 | def rebuild_db(module): 103 | rmdb_cmd = ['rm', '-f', '/var/lib/rpm/__db.*'] 104 | rc1, out1, err1 = execute_command(module, rmdb_cmd) 105 | cmd = [RPMBIN, '--rebuilddb'] 106 | rc, out, err = execute_command(module, cmd) 107 | return (rc == 0) and (rc1 == 0) 108 | 109 | 110 | # main 111 | def main(): 112 | 113 | # defining module 114 | module = AnsibleModule( 115 | argument_spec=dict( 116 | action = dict(required=False, default='check', choices=['check', 'rebuild']), 117 | timeout = dict(required=False, default=10, type='int') 118 | ) 119 | ) 120 | 121 | changed = False 122 | msg = '' 123 | action = module.params['action'] 124 | timeout = module.params['timeout'] 125 | check_cmd = 'rpm -qa' 126 | 127 | if action == 'check': 128 | rc = check_db(module, timeout) 129 | if rc == 1: 130 | module.fail_json(msg='Error when running cmd: %s' % (check_cmd)) 131 | elif rc == 2: 132 | module.fail_json(msg='return code error. cmd: %s' % (check_cmd)) 133 | elif rc == 3: 134 | module.fail_json(msg='Timeout %d s. cmd: %s' % (timeout, check_cmd)) 135 | elif rc == 0: 136 | msg = 'OK. cmd: %s' % check_cmd 137 | 138 | elif action == 'rebuild': 139 | rc = check_db(module, timeout) 140 | if rc != 0: 141 | if rebuild_db(module): 142 | changed = True 143 | msg = 'OK. rm -f /var/lib/rpm/__db.00*; rpm --rebuilddb' 144 | else: 145 | msg = 'Error. rm -f /var/lib/rpm/__db.00*; rpm --rebuilddb' 146 | module.fail_json(msg=msg) 147 | 148 | module.exit_json( 149 | changed = changed, 150 | action = action, 151 | msg = msg 152 | ) 153 | 154 | # this is magic, see lib/ansible/executor/module_common.py 155 | #<> 156 | main() 157 | -------------------------------------------------------------------------------- /py2014/paxel.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | # filename: paxel.py 4 | # FROM: http://fayaa.com/code/view/58/full/ 5 | # Jay modified it a little and save for further potential usage. 6 | 7 | '''It is a multi-thread downloading tool 8 | 9 | It was developed following axel. 10 | Author: volans 11 | E-mail: volansw [at] gmail.com 12 | ''' 13 | 14 | import sys 15 | import os 16 | import time 17 | import urllib 18 | from threading import Thread, Lock 19 | import shutil 20 | from contextlib import closing 21 | 22 | printLocker = Lock() 23 | 24 | # in case you want to use http_proxy 25 | local_proxies = {'http': 'http://131.139.58.200:8080'} 26 | 27 | 28 | class AxelPython(Thread, urllib.FancyURLopener): 29 | '''Multi-thread downloading class. 30 | 31 | run() is a vitural method of Thread. 32 | ''' 33 | def __init__(self, threadname, url, filename, ranges, proxies={}): 34 | Thread.__init__(self, name=threadname) 35 | urllib.FancyURLopener.__init__(self, proxies) 36 | self.name = threadname 37 | self.url = url 38 | self.filename = filename 39 | self.ranges = ranges 40 | self.downloaded = 0 41 | 42 | def run(self): 43 | '''vertual function in Thread''' 44 | try: 45 | self.downloaded = os.path.getsize(self.filename) 46 | except OSError: 47 | #print 'never downloaded' 48 | self.downloaded = 0 49 | 50 | # rebuild start poind 51 | self.startpoint = self.ranges[0] + self.downloaded 52 | 53 | # This part is completed 54 | if self.startpoint >= self.ranges[1]: 55 | print 'Part %s has been downloaded over.' % self.filename 56 | return 57 | 58 | self.oneTimeSize = 16384 # 16kByte/time 59 | printLocker.acquire() 60 | print 'task %s will download from %d to %d' % (self.name, self.startpoint, self.ranges[1]) 61 | printLocker.release() 62 | 63 | self.addheader("Range", "bytes=%d-%d" % (self.startpoint, self.ranges[1])) 64 | self.urlhandle = self.open(self.url) 65 | 66 | data = self.urlhandle.read(self.oneTimeSize) 67 | while data: 68 | filehandle = open(self.filename, 'ab+') 69 | filehandle.write(data) 70 | filehandle.close() 71 | 72 | self.downloaded += len(data) 73 | #print "%s" % (self.name) 74 | #progress = u'\r...' 75 | 76 | data = self.urlhandle.read(self.oneTimeSize) 77 | 78 | 79 | def GetUrlFileSize(url, proxies={}): 80 | with closing(urllib.urlopen(url, proxies=proxies)) as urlHandler: 81 | length = urlHandler.headers.getheader('Content-Length') 82 | if length is None: 83 | return 0 84 | else: 85 | return int(length) 86 | 87 | 88 | def SpliteBlocks(totalsize, blocknumber): 89 | blocksize = totalsize / blocknumber 90 | ranges = [] 91 | for i in range(0, blocknumber - 1): 92 | ranges.append((i * blocksize, i * blocksize + blocksize - 1)) 93 | ranges.append((blocksize * (blocknumber - 1), totalsize - 1)) 94 | 95 | return ranges 96 | 97 | 98 | def islive(tasks): 99 | for task in tasks: 100 | if task.isAlive(): 101 | return True 102 | return False 103 | 104 | 105 | def paxel(url, output, blocks=6, proxies=local_proxies): 106 | ''' paxel 107 | ''' 108 | size = GetUrlFileSize(url, proxies) 109 | ranges = SpliteBlocks(size, blocks) 110 | 111 | threadname = ["thread_%d" % i for i in range(0, blocks)] 112 | filename = ["tmpfile_%d" % i for i in range(0, blocks)] 113 | 114 | tasks = [] 115 | for i in range(0, blocks): 116 | task = AxelPython(threadname[i], url, filename[i], ranges[i]) 117 | task.setDaemon(True) 118 | task.start() 119 | tasks.append(task) 120 | 121 | time.sleep(2) 122 | while islive(tasks): 123 | downloaded = sum([task.downloaded for task in tasks]) 124 | process = downloaded / float(size) * 100 125 | show = u'\rFilesize:%d Downloaded:%d Completed:%.2f%%' % (size, downloaded, process) 126 | sys.stdout.write(show) 127 | sys.stdout.flush() 128 | time.sleep(0.5) 129 | sys.stdout.write(u'\rFilesize:{0} Downloaded:{0} Completed:100% \n'.format(size)) 130 | sys.stdout.flush() 131 | 132 | with open(output, 'wb+') as filehandle: 133 | for i in filename: 134 | with open(i, 'rb') as f: 135 | shutil.copyfileobj(f, filehandle, 102400) 136 | try: 137 | os.remove(i) 138 | except OSError: 139 | pass 140 | 141 | if __name__ == '__main__': 142 | url = 'http://dldir1.qq.com/qqfile/QQforMac/QQ_V3.1.1.dmg' 143 | # dowloading this master.zip file is a bug 144 | # url = 'https://github.com/openstack/nova/archive/master.zip' 145 | output = 'download.file' 146 | paxel(url, output, blocks=4, proxies={}) 147 | -------------------------------------------------------------------------------- /py2013/iplocation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | ''' 5 | Created on Oct 20, 2013 6 | @summary: geography info about an IP address 7 | @author: Jay http://smilejay.com/ 8 | ''' 9 | 10 | import json 11 | import urllib2 12 | import re 13 | from selenium import webdriver 14 | from selenium.webdriver.common.desired_capabilities import DesiredCapabilities 15 | 16 | 17 | class location_freegeoip(): 18 | ''' 19 | build the mapping of the ip address and its location. 20 | the geo info is from 21 | ''' 22 | 23 | def __init__(self, ip): 24 | ''' 25 | Constructor of location_freegeoip class 26 | ''' 27 | self.ip = ip 28 | self.api_format = 'json' 29 | self.api_url = 'http://freegeoip.net/%s/%s' % (self.api_format, self.ip) 30 | 31 | def get_geoinfo(self): 32 | """ get the geo info from the remote API. 33 | 34 | return a dict about the location. 35 | """ 36 | urlobj = urllib2.urlopen(self.api_url) 37 | data = urlobj.read() 38 | datadict = json.loads(data, encoding='utf-8') 39 | # print datadict 40 | return datadict 41 | 42 | def get_country(self): 43 | key = 'country_name' 44 | datadict = self.get_geoinfo() 45 | return datadict[key] 46 | 47 | def get_region(self): 48 | key = 'region_name' 49 | datadict = self.get_geoinfo() 50 | return datadict[key] 51 | 52 | def get_city(self): 53 | key = 'city' 54 | datadict = self.get_geoinfo() 55 | return datadict[key] 56 | 57 | 58 | class location_taobao(): 59 | ''' 60 | build the mapping of the ip address and its location 61 | the geo info is from Taobao 62 | e.g. http://ip.taobao.com/service/getIpInfo.php?ip=112.111.184.63 63 | The getIpInfo API from Taobao returns a JSON object. 64 | ''' 65 | 66 | def __init__(self, ip): 67 | self.ip = ip 68 | self.api_url = 'http://ip.taobao.com/service/getIpInfo.php?ip=%s' % self.ip 69 | 70 | def get_geoinfo(self): 71 | """ get the geo info from the remote API. 72 | 73 | return a dict about the location. 74 | """ 75 | urlobj = urllib2.urlopen(self.api_url) 76 | data = urlobj.read() 77 | datadict = json.loads(data, encoding='utf-8') 78 | # print datadict 79 | return datadict['data'] 80 | 81 | def get_country(self): 82 | key = u'country' 83 | datadict = self.get_geoinfo() 84 | return datadict[key] 85 | 86 | def get_region(self): 87 | key = 'region' 88 | datadict = self.get_geoinfo() 89 | return datadict[key] 90 | 91 | def get_city(self): 92 | key = 'city' 93 | datadict = self.get_geoinfo() 94 | return datadict[key] 95 | 96 | def get_isp(self): 97 | key = 'isp' 98 | datadict = self.get_geoinfo() 99 | return datadict[key] 100 | 101 | 102 | class location_qq(): 103 | ''' 104 | build the mapping of the ip address and its location. 105 | the geo info is from Tencent. 106 | Note: the content of the Tencent's API return page is encoded by 'gb2312'. 107 | e.g. http://ip.qq.com/cgi-bin/searchip?searchip1=112.111.184.64 108 | ''' 109 | 110 | def __init__(self, ip): 111 | ''' 112 | Construction of location_ipdotcn class. 113 | ''' 114 | self.ip = ip 115 | self.api_url = 'http://ip.qq.com/cgi-bin/searchip?searchip1=%s' % ip 116 | 117 | def get_geoinfo(self): 118 | urlobj = urllib2.urlopen(self.api_url) 119 | data = urlobj.read().decode('gb2312').encode('utf8') 120 | pattern = re.compile(r'该IP所在地为:(.+)') 121 | m = re.search(pattern, data) 122 | if m is not None: 123 | return m.group(1).split(' ') 124 | else: 125 | return None 126 | 127 | def get_region(self): 128 | return self.get_geoinfo()[0] 129 | 130 | def get_isp(self): 131 | return self.get_geoinfo()[1] 132 | 133 | 134 | class location_ipdotcn(): 135 | ''' 136 | build the mapping of the ip address and its location. 137 | the geo info is from www.ip.cn 138 | need to use PhantomJS to open the URL to render its JS 139 | ''' 140 | 141 | def __init__(self, ip): 142 | ''' 143 | Construction of location_ipdotcn class. 144 | ''' 145 | self.ip = ip 146 | self.api_url = 'http://www.ip.cn/%s' % ip 147 | 148 | def get_geoinfo(self): 149 | dcap = dict(DesiredCapabilities.PHANTOMJS) 150 | dcap["phantomjs.page.settings.userAgent"] = ( 151 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:25.0) Gecko/20100101 Firefox/29.0 ") 152 | driver = webdriver.PhantomJS( 153 | executable_path='/usr/local/bin/phantomjs', 154 | desired_capabilities=dcap) 155 | driver.get(self.api_url) 156 | text = driver.find_element_by_xpath('//div[@id="result"]/div/p').text 157 | res = text.split('来自:')[1].split(' ') 158 | driver.quit() 159 | return res 160 | 161 | def get_region(self): 162 | return self.get_geoinfo()[0] 163 | 164 | def get_isp(self): 165 | return self.get_geoinfo()[1] 166 | 167 | 168 | if __name__ == '__main__': 169 | ip = '110.84.0.129' 170 | # iploc = location_taobao(ip) 171 | # print iploc.get_geoinfo() 172 | # print iploc.get_country() 173 | # print iploc.get_region() 174 | # print iploc.get_city() 175 | # print iploc.get_isp() 176 | # iploc = location_qq(ip) 177 | iploc = location_ipdotcn(ip) 178 | # iploc.get_geoinfo() 179 | print iploc.get_region() 180 | print iploc.get_isp() 181 | -------------------------------------------------------------------------------- /py2014/mult_thread_download.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #coding=utf-8 3 | # FROM: http://blog.sina.com.cn/s/blog_4ef8be9f0100fe67.html 4 | 5 | import os 6 | import sys 7 | import time 8 | import urllib 9 | import urllib2 10 | import threading 11 | 12 | ############################################################################# 13 | # 14 | # self-defined exception classes 15 | # 16 | ############################################################################# 17 | class ConnectionError(Exception): pass 18 | class URLUnreachable(Exception):pass 19 | class CanotDownload(Exception):pass 20 | 21 | ############################################################################# 22 | # 23 | # multiple threads download module starts here 24 | # 25 | ############################################################################# 26 | class HttpGetThread(threading.Thread): 27 | def __init__(self, name, url, filename, range=0): 28 | threading.Thread.__init__(self, ) 29 | self.url = url 30 | self.filename = filename 31 | self.range = range 32 | self.totalLength = range[1] - range[0] +1 33 | try: 34 | self.downloaded = os.path.getsize(self.filename) 35 | except OSError: 36 | self.downloaded = 0 37 | self.percent = self.downloaded/float(self.totalLength)*100 38 | self.headerrange = (self.range[0]+self.downloaded, self.range[1]) 39 | self.bufferSize = 8192 40 | 41 | 42 | def run(self): 43 | try: 44 | self.downloaded = os.path.getsize(self.filename) 45 | except OSError: 46 | self.downloaded = 0 47 | self.percent = self.downloaded/float(self.totalLength)*100 48 | #self.headerrange = (self.range[0]+self.downloaded, self.range[1]) 49 | self.bufferSize = 8192 50 | #request = urllib2.Request(self.url) 51 | #request.add_header('Range', 'bytes=%d-%d' %self.headerrange) 52 | downloadAll = False 53 | retries = 1 54 | while not downloadAll: 55 | if retries > 10: 56 | break 57 | try: 58 | self.headerrange = (self.range[0]+self.downloaded, self.range[1]) 59 | request = urllib2.Request(self.url) 60 | request.add_header('Range', 'bytes=%d-%d' %self.headerrange) 61 | conn = urllib2.urlopen(request) 62 | startTime = time.time() 63 | data = conn.read(self.bufferSize) 64 | while data: 65 | f = open(self.filename, 'ab') 66 | f.write(data) 67 | f.close() 68 | self.time = int(time.time() - startTime) 69 | self.downloaded += len(data) 70 | self.percent = self.downloaded/float(self.totalLength) *100 71 | data = conn.read(self.bufferSize) 72 | downloadAll = True 73 | except Exception, err: 74 | retries += 1 75 | time.sleep(1) 76 | continue 77 | 78 | def Split(size,blocks): 79 | ranges = [] 80 | blocksize = size / blocks 81 | for i in xrange(blocks-1): 82 | ranges.append(( i*blocksize, i*blocksize+blocksize-1)) 83 | ranges.append(( blocksize*(blocks-1), size-1)) 84 | 85 | return ranges 86 | 87 | def GetHttpFileSize(url): 88 | length = 0 89 | try: 90 | conn = urllib.urlopen(url) 91 | headers = conn.info().headers 92 | for header in headers: 93 | if header.find('Length') != -1: 94 | length = header.split(':')[-1].strip() 95 | length = int(length) 96 | except Exception, err: 97 | pass 98 | 99 | return length 100 | 101 | def hasLive(ts): 102 | for t in ts: 103 | if t.isAlive(): 104 | return True 105 | return False 106 | 107 | def MyHttpGet(url, output=None, connections=4): 108 | """ 109 | arguments: 110 | url, in GBK encoding 111 | output, default encoding, do no convertion 112 | connections, integer 113 | """ 114 | length = GetHttpFileSize(url) 115 | print length 116 | mb = length/1024/1024.0 117 | if length == 0: 118 | raise URLUnreachable 119 | blocks = connections 120 | if output: 121 | filename = output 122 | else: 123 | output = url.split('/')[-1] 124 | ranges = Split(length, blocks) 125 | names = ["%s_%d" %(output,i) for i in xrange(blocks)] 126 | 127 | ts = [] 128 | for i in xrange(blocks): 129 | t = HttpGetThread(i, url, names[i], ranges[i]) 130 | t.setDaemon(True) 131 | t.start() 132 | ts.append(t) 133 | 134 | live = hasLive(ts) 135 | startSize = sum([t.downloaded for t in ts]) 136 | startTime = time.time() 137 | etime = 0 138 | while live: 139 | try: 140 | etime = time.time() - startTime 141 | d = sum([t.downloaded for t in ts])/float(length)*100 142 | downloadedThistime = sum([t.downloaded for t in ts])-startSize 143 | try: 144 | rate = downloadedThistime / float(etime)/1024 145 | except: 146 | rate = 100.0 147 | progressStr = u'\rFilesize: %d(%.2fM) Downloaded: %.2f%% Avg rate: %.1fKB/s' %(length, mb, d, rate) 148 | sys.stdout.write(progressStr) 149 | sys.stdout.flush() 150 | #sys.stdout.write('\b'*(len(progressStr)+1)) 151 | live = hasLive(ts) 152 | time.sleep(0.2) 153 | except KeyboardInterrupt: 154 | print 155 | print "Exit..." 156 | for n in names: 157 | try: 158 | os.remove(n) 159 | except: 160 | pass 161 | sys.exit(1) 162 | 163 | print 164 | #print u'used time: %d:%d, pingjunsudu:%.2fKB/s' %(int(etime)/60, int(etime)%60,rate) 165 | 166 | f = open(filename, 'wb') 167 | for n in names: 168 | f.write(open(n,'rb').read()) 169 | try: 170 | os.remove(n) 171 | except: 172 | pass 173 | f.close() 174 | 175 | 176 | if __name__ == '__main__': 177 | MyHttpGet('http://dldir1.qq.com/qqfile/QQforMac/QQ_V3.1.1.dmg','my_download.file',4) 178 | -------------------------------------------------------------------------------- /py2013/access_log_analysis.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on Oct 20, 2013 3 | @summary: to analyze apache access_log and list the top N IPs of the visitors. 4 | for my website, I found many spam visitors. 5 | @author: Jay 6 | ''' 7 | import resource 8 | import time 9 | import glob 10 | from itertools import groupby 11 | from operator import itemgetter 12 | from heapq import nlargest 13 | from iplocation import location_taobao, location_qq, location_freegeoip 14 | 15 | # the sample dirctory is on a Mac OS X system. 16 | log_dir = '/Users/jay/workspace/python_study/data/httpd' 17 | default_file = '/Users/jay/workspace/python_study/data/httpd/access_log' 18 | 19 | def get_top_n_ip(log_file=default_file, top_n=20): 20 | """ list the top n IPs in a access_log file 21 | 22 | return the list of tuples ('110.84.0.129', 9999), by default it returns top 20 23 | """ 24 | 25 | ip_list = [] 26 | with open(log_file) as access_log: 27 | for line in access_log: 28 | cols = [x for x in line.split()] 29 | ip_list.append(cols[0]) 30 | 31 | # print 'RAM used: %d MB' % int(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024/1024) 32 | # time.sleep(3) 33 | ip_list.sort(cmp=None, key=None, reverse=False) 34 | ip_dict = {} 35 | # print [len(list(group)) for key, group in groupby(ip_list)] 36 | for key, value in groupby(ip_list): 37 | ip_dict[key] = len(list(value)) 38 | ## sorted_dict = sorted(ip_dict.iteritems(), key=itemgetter(1), reverse=True)[:top_n] 39 | # print sorted_dict 40 | # print nlargest(top_n, ip_dict.iteritems(), itemgetter(1)) 41 | return nlargest(top_n, ip_dict.iteritems(), itemgetter(1)) 42 | 43 | def get_access_logs(file_dir=log_dir): 44 | """ list the access_log files in the a directory 45 | 46 | return a list of the files with absolute path. 47 | """ 48 | 49 | file_list = [] 50 | for myfile in glob.glob1(file_dir, 'access_log*'): 51 | file_list.append('%s/%s' % (file_dir, myfile)) 52 | # print file_list 53 | return file_list 54 | 55 | def remove_localhost_ip(ip_dict): 56 | """ remove localhost ('::1') from the ip_dict 57 | 58 | """ 59 | localhost_key = '::1' 60 | ip_dict.pop(localhost_key) 61 | return ip_dict 62 | 63 | def merge_top_n_ips(file_dir=log_dir, top_n=50): 64 | """ merge the top n IPs in all of the files in the file_dir 65 | 66 | for a IP, it will add all the existing counts together. 67 | After merging, 68 | return the list of tuples ('110.84.0.129', 19999), by default it returns top 50 69 | """ 70 | 71 | top_ip_list = [] 72 | ip_list = [] 73 | top_ip_dict = {} 74 | file_list = get_access_logs(file_dir) 75 | for i in range(len(file_list)): 76 | top_20_ip_list = get_top_n_ip(file_list[i], 20) 77 | for j in top_20_ip_list: 78 | ip_list.append(j[0]) 79 | top_ip_list.append(top_20_ip_list) 80 | 81 | uniq_ip_list = list(set(ip_list)) 82 | for ip in uniq_ip_list: 83 | top_ip_dict[ip] = 0 84 | 85 | for ip in list(uniq_ip_list): 86 | for i in top_ip_list: 87 | for j in i: 88 | if j[0] == ip: 89 | top_ip_dict[ip] += j[1] 90 | remove_localhost_ip(top_ip_dict) 91 | # print top_ip_dict 92 | # print nlargest(top_n, top_ip_dict.iteritems(), itemgetter(1)) 93 | return nlargest(top_n, top_ip_dict.iteritems(), itemgetter(1)) 94 | 95 | def get_top_ip_prefix(top_ip_list): 96 | """ list the first 2 sections (prefix) of the the IP list 97 | 98 | list the IPs with the same prefix; we should pay more attention to these ip prefix 99 | """ 100 | prefix_list = [] 101 | prefix_pv_list = [] 102 | prefix_pv_count_list = [] 103 | for i in top_ip_list: 104 | num_list = i[0].split('.') 105 | prefix = '%s.%s' % (num_list[0], num_list[1]) 106 | prefix_list.append(prefix) 107 | prefix_pv_list.append((prefix, i[1])) 108 | uniq_prefix_list = list(set(prefix_list)) 109 | for prefix in uniq_prefix_list: 110 | tmp_list = [] 111 | tmp_pv = 0 112 | tmp_list.append(prefix) 113 | for j in prefix_pv_list: 114 | if j[0] == prefix: 115 | tmp_pv += j[1] 116 | tmp_list.append(tmp_pv) 117 | tmp_list.append(prefix_list.count(prefix)) 118 | prefix_pv_count_list.append(tmp_list) 119 | 120 | return sorted(prefix_pv_count_list, key=itemgetter(1), reverse=True) 121 | 122 | def show_top_n_ip_geo(): 123 | top_ip_list = merge_top_n_ips(log_dir) 124 | ipprefix_pv_count_list = get_top_ip_prefix(top_ip_list) 125 | print('-----------top 50 ip and its pv -----------') 126 | for i in top_ip_list: 127 | geoinfo = location_taobao(i[0]).get_region() 128 | print('%s %s %s' % (i[0], geoinfo, i[1])) 129 | print('') 130 | print('---------- top ip-prefix and its pv --------') 131 | for j in ipprefix_pv_count_list: 132 | geoinfo = location_taobao('%s.0.0' % j[0]).get_region() 133 | print('%s %s %s %d' % (j[0], geoinfo, j[1], j[2])) 134 | 135 | if __name__ == '__main__': 136 | # get_top_20_ip() 137 | # get_access_logs() 138 | # merge_top_n_ips(log_dir, 50) 139 | top_ip_list = [('110.84.0.129', 118926), ('175.44.13.57', 30833), ('175.44.55.67', 30532), \ 140 | ('175.44.32.161', 30455), ('112.111.184.64', 30206), ('218.6.2.177', 24958), \ 141 | ('117.26.255.120', 24310), ('220.161.103.41', 23361), ('112.111.172.250', 23009), \ 142 | ('175.44.8.176', 22990), ('27.153.128.68', 19397), ('59.58.138.190', 18834), \ 143 | ('59.58.138.184', 17083), ('220.161.103.87', 16560), ('120.33.241.180', 16035), \ 144 | ('175.44.13.183', 15905), ('220.161.103.211', 15516), ('117.26.73.89', 14370), \ 145 | ('58.22.74.52', 12634), ('175.44.14.172', 12275), ('175.44.19.36', 11490), \ 146 | ('112.111.183.215', 10790), ('175.42.94.30', 10518), ('36.248.160.186', 9962), \ 147 | ('175.42.95.186', 9696), ('112.111.187.190', 9688), ('175.42.94.90', 9314)] 148 | # print len(top_ip_list) 149 | # get_top_ip_prefix(top_ip_list) 150 | show_top_n_ip_geo() -------------------------------------------------------------------------------- /py2016/SimpleHTTPServerWithUpload.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Simple HTTP Server With Upload. 4 | 5 | https://github.com/tualatrix/tools/blob/master/SimpleHTTPServerWithUpload.py 6 | 7 | This module builds on BaseHTTPServer by implementing the standard GET 8 | and HEAD requests in a fairly straightforward manner. 9 | 10 | """ 11 | 12 | 13 | import os 14 | import posixpath 15 | import BaseHTTPServer 16 | import urllib 17 | import cgi 18 | import shutil 19 | import mimetypes 20 | import re 21 | 22 | __version__ = "0.1" 23 | __all__ = ["SimpleHTTPRequestHandler"] 24 | __author__ = "bones7456" 25 | __home_page__ = "http://li2z.cn/" 26 | 27 | try: 28 | from cStringIO import StringIO 29 | except ImportError: 30 | from StringIO import StringIO 31 | 32 | 33 | class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): 34 | 35 | """Simple HTTP request handler with GET/HEAD/POST commands. 36 | 37 | This serves files from the current directory and any of its 38 | subdirectories. The MIME type for files is determined by 39 | calling the .guess_type() method. And can reveive file uploaded 40 | by client. 41 | 42 | The GET/HEAD/POST requests are identical except that the HEAD 43 | request omits the actual contents of the file. 44 | 45 | """ 46 | 47 | server_version = "SimpleHTTPWithUpload/" + __version__ 48 | 49 | def do_GET(self): 50 | """Serve a GET request.""" 51 | f = self.send_head() 52 | if f: 53 | self.copyfile(f, self.wfile) 54 | f.close() 55 | 56 | def do_HEAD(self): 57 | """Serve a HEAD request.""" 58 | f = self.send_head() 59 | if f: 60 | f.close() 61 | 62 | def do_POST(self): 63 | """Serve a POST request.""" 64 | r, info = self.deal_post_data() 65 | print r, info, "by: ", self.client_address 66 | f = StringIO() 67 | f.write('') 68 | f.write("\nUpload Result Page\n") 69 | f.write("\n

Upload Result Page

\n") 70 | f.write("
\n") 71 | if r: 72 | f.write("Success:") 73 | else: 74 | f.write("Failed:") 75 | f.write(info) 76 | f.write("
back" % self.headers['referer']) 77 | f.write("
Powered By: bones7456, check new version at ") 78 | f.write("") 79 | f.write("here.\n\n") 80 | length = f.tell() 81 | f.seek(0) 82 | self.send_response(200) 83 | self.send_header("Content-type", "text/html") 84 | self.send_header("Content-Length", str(length)) 85 | self.end_headers() 86 | if f: 87 | self.copyfile(f, self.wfile) 88 | f.close() 89 | 90 | def deal_post_data(self): 91 | boundary = self.headers.plisttext.split("=")[1] 92 | remainbytes = int(self.headers['content-length']) 93 | line = self.rfile.readline() 94 | remainbytes -= len(line) 95 | if boundary not in line: 96 | return (False, "Content NOT begin with boundary") 97 | line = self.rfile.readline() 98 | remainbytes -= len(line) 99 | fn = re.findall(r'Content-Disposition.*name="file"; filename="(.*)"', line) 100 | if not fn: 101 | return (False, "Can't find out file name...") 102 | path = self.translate_path(self.path) 103 | fn = os.path.join(path, fn[0]) 104 | while os.path.exists(fn): 105 | fn += "_" 106 | line = self.rfile.readline() 107 | remainbytes -= len(line) 108 | line = self.rfile.readline() 109 | remainbytes -= len(line) 110 | try: 111 | out = open(fn, 'wb') 112 | except IOError: 113 | return (False, "Can't create file to write, do you have permission to write?") 114 | 115 | preline = self.rfile.readline() 116 | remainbytes -= len(preline) 117 | while remainbytes > 0: 118 | line = self.rfile.readline() 119 | remainbytes -= len(line) 120 | if boundary in line: 121 | preline = preline[0:-1] 122 | if preline.endswith('\r'): 123 | preline = preline[0:-1] 124 | out.write(preline) 125 | out.close() 126 | return (True, "File '%s' upload success!" % fn) 127 | else: 128 | out.write(preline) 129 | preline = line 130 | return (False, "Unexpect Ends of data.") 131 | 132 | def send_head(self): 133 | """Common code for GET and HEAD commands. 134 | 135 | This sends the response code and MIME headers. 136 | 137 | Return value is either a file object (which has to be copied 138 | to the outputfile by the caller unless the command was HEAD, 139 | and must be closed by the caller under all circumstances), or 140 | None, in which case the caller has nothing further to do. 141 | 142 | """ 143 | path = self.translate_path(self.path) 144 | f = None 145 | if os.path.isdir(path): 146 | if not self.path.endswith('/'): 147 | # redirect browser - doing basically what apache does 148 | self.send_response(301) 149 | self.send_header("Location", self.path + "/") 150 | self.end_headers() 151 | return None 152 | for index in "index.html", "index.htm": 153 | index = os.path.join(path, index) 154 | if os.path.exists(index): 155 | path = index 156 | break 157 | else: 158 | return self.list_directory(path) 159 | ctype = self.guess_type(path) 160 | try: 161 | # Always read in binary mode. Opening files in text mode may cause 162 | # newline translations, making the actual size of the content 163 | # transmitted *less* than the content-length! 164 | f = open(path, 'rb') 165 | except IOError: 166 | self.send_error(404, "File not found") 167 | return None 168 | self.send_response(200) 169 | self.send_header("Content-type", ctype) 170 | fs = os.fstat(f.fileno()) 171 | self.send_header("Content-Length", str(fs[6])) 172 | self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) 173 | self.end_headers() 174 | return f 175 | 176 | def list_directory(self, path): 177 | """Helper to produce a directory listing (absent index.html). 178 | 179 | Return value is either a file object, or None (indicating an 180 | error). In either case, the headers are sent, making the 181 | interface the same as for send_head(). 182 | 183 | """ 184 | try: 185 | list = os.listdir(path) 186 | except os.error: 187 | self.send_error(404, "No permission to list directory") 188 | return None 189 | list.sort(key=lambda a: a.lower()) 190 | f = StringIO() 191 | displaypath = cgi.escape(urllib.unquote(self.path)) 192 | f.write('') 193 | f.write("\nDirectory listing for %s\n" % displaypath) 194 | f.write("\n

Directory listing for %s

\n" % displaypath) 195 | f.write("
\n") 196 | f.write("
") 197 | f.write("") 198 | f.write("
\n") 199 | f.write("
\n
    \n") 200 | for name in list: 201 | fullname = os.path.join(path, name) 202 | displayname = linkname = name 203 | # Append / for directories or @ for symbolic links 204 | if os.path.isdir(fullname): 205 | displayname = name + "/" 206 | linkname = name + "/" 207 | if os.path.islink(fullname): 208 | displayname = name + "@" 209 | # Note: a link to a directory displays with @ and links with / 210 | f.write('
  • %s\n' 211 | % (urllib.quote(linkname), cgi.escape(displayname))) 212 | f.write("
\n
\n\n\n") 213 | length = f.tell() 214 | f.seek(0) 215 | self.send_response(200) 216 | self.send_header("Content-type", "text/html") 217 | self.send_header("Content-Length", str(length)) 218 | self.end_headers() 219 | return f 220 | 221 | def translate_path(self, path): 222 | """Translate a /-separated PATH to the local filename syntax. 223 | 224 | Components that mean special things to the local file system 225 | (e.g. drive or directory names) are ignored. (XXX They should 226 | probably be diagnosed.) 227 | 228 | """ 229 | # abandon query parameters 230 | path = path.split('?', 1)[0] 231 | path = path.split('#', 1)[0] 232 | path = posixpath.normpath(urllib.unquote(path)) 233 | words = path.split('/') 234 | words = filter(None, words) 235 | path = os.getcwd() 236 | for word in words: 237 | drive, word = os.path.splitdrive(word) 238 | head, word = os.path.split(word) 239 | if word in (os.curdir, os.pardir): 240 | continue 241 | path = os.path.join(path, word) 242 | return path 243 | 244 | def copyfile(self, source, outputfile): 245 | """Copy all data between two file objects. 246 | 247 | The SOURCE argument is a file object open for reading 248 | (or anything with a read() method) and the DESTINATION 249 | argument is a file object open for writing (or 250 | anything with a write() method). 251 | 252 | The only reason for overriding this would be to change 253 | the block size or perhaps to replace newlines by CRLF 254 | -- note however that this the default server uses this 255 | to copy binary data as well. 256 | 257 | """ 258 | shutil.copyfileobj(source, outputfile) 259 | 260 | def guess_type(self, path): 261 | """Guess the type of a file. 262 | 263 | Argument is a PATH (a filename). 264 | 265 | Return value is a string of the form type/subtype, 266 | usable for a MIME Content-type header. 267 | 268 | The default implementation looks the file's extension 269 | up in the table self.extensions_map, using application/octet-stream 270 | as a default; however it would be permissible (if 271 | slow) to look inside the data to make a better guess. 272 | 273 | """ 274 | 275 | base, ext = posixpath.splitext(path) 276 | if ext in self.extensions_map: 277 | return self.extensions_map[ext] 278 | ext = ext.lower() 279 | if ext in self.extensions_map: 280 | return self.extensions_map[ext] 281 | else: 282 | return self.extensions_map[''] 283 | 284 | if not mimetypes.inited: 285 | mimetypes.init() # try to read system mime.types 286 | extensions_map = mimetypes.types_map.copy() 287 | extensions_map.update({ 288 | '': 'application/octet-stream', # Default 289 | '.py': 'text/plain', 290 | '.c': 'text/plain', 291 | '.h': 'text/plain', 292 | }) 293 | 294 | 295 | def test(HandlerClass=SimpleHTTPRequestHandler, 296 | ServerClass=BaseHTTPServer.HTTPServer): 297 | BaseHTTPServer.test(HandlerClass, ServerClass) 298 | 299 | if __name__ == '__main__': 300 | test() 301 | -------------------------------------------------------------------------------- /py2014/poi.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ''' 4 | Created on Apr 18, 2014 5 | 6 | @author: Jay 7 | ''' 8 | 9 | import sys 10 | sys.path.append("..") 11 | import lib.file2list as file2list 12 | import lib.db_connection as db_connection 13 | from config import dianping_db_config 14 | import random 15 | import urllib 16 | import json 17 | import os 18 | from xml.dom import minidom 19 | import codecs 20 | 21 | 22 | class poi(object): 23 | ''' 24 | POI class for the QA dashboard. 25 | ''' 26 | 27 | poi_xml = 'poi.xml' 28 | poi_with_search_xml = 'poi_with_search.xml' 29 | 30 | def __init__(self, cityfile='cities.txt', shoptype='10', sample_cnt=10, searcher_host='192.168.5.149:4053'): 31 | ''' 32 | Constructor: 33 | cityfile='cities.txt' # for city list file 34 | shoptype='10' # for '美食' category 35 | sample_cnt=10 # how many samples will be collected 36 | searcher_host # the host and port of the shop search engine 37 | QA env: searcher_host='192.168.5.149:4053' 38 | Product env: searcher_host='10.1.1.158:4053' 39 | ''' 40 | 41 | self.myfile = cityfile 42 | self.shoptype = shoptype 43 | self.sample_cnt = sample_cnt 44 | self.city_list = file2list.list_from_file(cityfile) 45 | # self.city_list = ['长沙'] #just for debugging 46 | self.searcher_host = searcher_host 47 | 48 | def get_poi_info(self): 49 | ''' 50 | get random POI samples from the initiated city list. 51 | ''' 52 | 53 | poi_info_list = [] 54 | cnx = db_connection.connect_db(**dianping_db_config) 55 | if cnx == None: 56 | print "DianPing DB connection ERROR!!" 57 | sys.exit(1) 58 | cursor = cnx.cursor() 59 | for city in self.city_list: 60 | sql1 = "SELECT shop.ShopID FROM ZS_CityList as cl, DP_Shop as shop WHERE cl.CityID=shop.CityID AND ShopType='%s' " % self.shoptype \ 61 | + "AND cl.CityName='%s' AND shop.GLat is not null AND shop.Glng is not null AND shop.Glat!=0 AND shop.Glng!=0 " % city \ 62 | + "AND (shop.Power IN(0, 2, 3, 4, 5, 10) OR (shop.Power=1 AND shop.LastDate >= '2013-07-01' )) " \ 63 | + "AND shop.shopid NOT IN (SELECT ShopID FROM DP_ClosedShop)" \ 64 | + "AND shop.ShopID NOT IN (SELECT ShopID FROM DP_ShopAutoAudit WHERE STATUS<>2);" 65 | cursor.execute(sql1) 66 | rows = cursor.fetchall() 67 | # print rows.__sizeof__() 68 | if len(rows) >= self.sample_cnt: 69 | shopid_list = [row[0] for row in rows] 70 | new_shopid_list = random.sample(shopid_list, self.sample_cnt) 71 | # print new_shopid_list 72 | # print shopid_list.__sizeof__() 73 | for id in new_shopid_list: 74 | temp_list = [] 75 | sql2 = "SELECT shop.ShopID, shop.ShopName, shop.GLng, shop.GLat, cl.CityID, cl.CityName FROM ZS_CityList as cl, DP_Shop as shop "\ 76 | + "WHERE cl.CityID=shop.CityID AND shop.ShopID='%s' LIMIT 1;" % str(id) 77 | cursor.execute(sql2) 78 | rows = cursor.fetchone() 79 | for i in rows: 80 | temp_list.append(i) 81 | poi_info_list.append(temp_list) 82 | return poi_info_list 83 | 84 | def poi_info_to_xml(self, poi_info_list=None): 85 | ''' 86 | save poi_info_list to a XML file. 87 | ''' 88 | 89 | impl = minidom.getDOMImplementation() 90 | dom = impl.createDocument(None, 'poi', None) 91 | root = dom.documentElement 92 | 93 | for i in poi_info_list: 94 | shop = dom.createElement('shop') 95 | shop.setAttribute('id', str(i[0]) ) 96 | shopname = dom.createElement('shopname') 97 | shopname_text = dom.createTextNode(i[1]) 98 | shopname.appendChild(shopname_text) 99 | lng = dom.createElement('longitude') 100 | lng_text = dom.createTextNode(str(i[2])) 101 | lng.appendChild(lng_text) 102 | lat = dom.createElement('latitude') 103 | lat_text = dom.createTextNode(str(i[3])) 104 | lat.appendChild(lat_text) 105 | cityid = dom.createElement('cityid') 106 | cityid_text = dom.createTextNode(str(i[4])) 107 | cityid.appendChild(cityid_text) 108 | cityname = dom.createElement('cityname') 109 | cityname_text = dom.createTextNode(i[5]) 110 | cityname.appendChild(cityname_text) 111 | imgname = dom.createElement('imgname') 112 | imgname_text = dom.createTextNode('%s_%s.jpg' % (i[4], i[0])) 113 | imgname.appendChild(imgname_text) 114 | 115 | shop.appendChild(shopname) 116 | shop.appendChild(lng) 117 | shop.appendChild(lat) 118 | shop.appendChild(cityid) 119 | shop.appendChild(cityname) 120 | shop.appendChild(imgname) 121 | 122 | root.appendChild(shop) 123 | 124 | f = codecs.open(self.poi_xml, 'w', 'utf-8') 125 | dom.writexml(f, addindent=' ', newl='\n') 126 | f.close() 127 | 128 | def xml_to_poi_info_list (self): 129 | ''' 130 | load xml to a poi_info_list. 131 | ''' 132 | 133 | poi_info_list = [] 134 | xmldoc = minidom.parse(self.poi_xml) 135 | shops = xmldoc.getElementsByTagName('shop') 136 | for shop in shops: 137 | shopid = shop.getAttribute('id') 138 | shopname = shop.getElementsByTagName('shopname')[0].childNodes[0].nodeValue 139 | lng = shop.getElementsByTagName('longitude')[0].childNodes[0].nodeValue 140 | lat = shop.getElementsByTagName('latitude')[0].childNodes[0].nodeValue 141 | cityid = shop.getElementsByTagName('cityid')[0].childNodes[0].nodeValue 142 | cityname = shop.getElementsByTagName('cityname')[0].childNodes[0].nodeValue 143 | poi_info_list.append([shopid, shopname, lng, lat, cityid, cityname]) 144 | return poi_info_list 145 | 146 | def get_search_info(self, poi_info=None): 147 | ''' 148 | poi_info must be a list as [shopid, shopname, longitude, latitude, citiyid, cityname] 149 | e.g. [9125655, '龙马川菜馆', 119.940181, 31.969895, 93, '常州'] 150 | ''' 151 | 152 | searcher_host = self.searcher_host 153 | tmpfile = 's.out' 154 | distant = 1000 # 1000m nearby 155 | query_url = "http://%s/search/shop?query=term(categoryids,10),geo(poi,%s:%s,%s)&sort=desc(dpscore)&" % (searcher_host, poi_info[2], poi_info[3], distant) \ 156 | + "fl=dist(poi,%s:%s),shopid,shopname,shoppower,branchname,altname,power," % (poi_info[2], poi_info[3]) \ 157 | + "shopgroupid,shoptype,cityid,defaultpic,avgprice,hasgroupuid,address,crossroad,score1,score2,score2,dishtags,pictotal,hasbooksetting,"\ 158 | + "dealgroupid,membercardid,votetotal,hasmembercard,booktype,dealgrouptitle,dealgroupprice,contenttitle,hasticket,region1,"\ 159 | + "category1,gpoi,isnewshop,shoptags,publictransit,haspromo,prepaidcards,hastakeaway,hasshortdeals,marketprice,poi,"\ 160 | + "mopaystatus&limit=0,25&info=clientversion:6.5,app:PointShopSearch,platform:MAPI," \ 161 | + "referrequestid:013d92a7-a7eb-45b9-8534-5ccd0c176b1a,"\ 162 | + "useragent:MApi+1.1+%%28com.dianping.v1+6.5+om_sd_xiaomishichang+MI_3W%%3B+Android+4.3%%29,userlng:121.41542,"\ 163 | + "queryid:f1efe724-9fb4-4e50-954a-e700f3c5e5a5,clientip:192.168.213.52,userlat:31.21746,sorttype:1,"\ 164 | + "unrelatedguidefields:categoryids%%3Bregionids,geoFieldName:poi,mobileplatform:1,poi:%s%%A%s," % (poi_info[2], poi_info[3]) \ 165 | + "wifi:,dpid:-7517157582792622873,requestid:5734f0bd-e4f6-4e0a-8fbc-622659b6e2e2,mobiledeviceid:863360024707550,userip:210.22.122.2" 166 | # print query_url 167 | # query_url = "http://192.168.5.149:4053/search/shop?query=geo%28poi,113.0231:28.19991,1000%29&sort=desc%28dpscore%29&fl=dist%28poi,113.0231:28.19991%29,shopid,shopname,shoppower,branchname,altname,power,shopgroupid,shoptype,cityid,defaultpic,avgprice,phone,address,crossroad,score1,score2,score2,dishtags,pictotal,hasbooksetting,dealgroupid,membercardid,membercardtitle,smspromoid,booktype,dealgrouptitle,dealgroupprice,contenttitle,hasticket,region1,category1,gpoi,isnewshop,shoptags,publictransit,businesshours,prepaidcards,hastakeaway,hasshortdeals,marketprice,poi,mopaystatus&limit=0,25&info=clientversion:6.5,app:PointShopSearch,platform:MAPI,referrequestid:013d92a7-a7eb-45b9-8534-5ccd0c176b1a,useragent:MApi+1.1+%28com.dianping.v1+6.5+om_sd_xiaomishichang+MI_3W;+Android+4.3%29,userlng:121.41542,queryid:f1efe724-9fb4-4e50-954a-e700f3c5e5a5,clientip:192.168.213.52,userlat:31.21746,sorttype:1,unrelatedguidefields:categoryids;regionids,geoFieldName:poi,mobileplatform:1,poi:113.0231:28.19991,wifi:,dpid:-7517157582792622873,requestid:5734f0bd-e4f6-4e0a-8fbc-622659b6e2e2,mobiledeviceid:863360024707550,userip:210.22.122.2" 168 | which_cmd = 'which wget' 169 | p = os.popen(which_cmd) 170 | abs_cmd = p.read().strip() 171 | mycmd = '%s -q "%s" -O %s' % (abs_cmd, query_url, tmpfile) 172 | os.system(mycmd) 173 | with open(tmpfile) as f: 174 | raw_data = json.loads(f.read()) 175 | return raw_data['records'] 176 | 177 | def parse_search_info(self, json_data=None, shopid=None): 178 | ''' 179 | parse the search info (records) and do some calculation, then return a dict. 180 | ''' 181 | 182 | res = {'shop_total_cnt' : len(json_data), 183 | 'pos_index' : 0, 184 | 'has_pic_cnt' : 0, 185 | 'has_star_cnt' : 0, 186 | 'has_review_cnt' : 0, 187 | 'has_tg_cnt' : 0, 188 | 'has_rs_cnt' : 0, 189 | 'has_ta_cnt' : 0 190 | } 191 | for i, j in enumerate(json_data): 192 | if j['shopid'] == str(shopid): 193 | res['pos_index'] = i + 1 194 | if j['pictotal'] != "0": 195 | res['has_pic_cnt'] += 1 196 | if j['shoppower'] != "0": 197 | res['has_star_cnt'] += 1 198 | if j['votetotal'] != "0": 199 | res['has_review_cnt'] += 1 200 | if j['dealgroupid'] and j['dealgroupid'] != "0": 201 | res['has_tg_cnt'] += 1 202 | if j['hasbooksetting'] == "1": 203 | res['has_rs_cnt'] += 1 204 | if j['hastakeaway'] == "1": 205 | res['has_ta_cnt'] += 1 206 | 207 | return res 208 | 209 | def add_search_info_to_xml(self): 210 | ''' 211 | add the search result to the POI xml file (currently I create a new XML file). 212 | ''' 213 | 214 | poi_info_list = self.xml_to_poi_info_list() 215 | search_res_list = [] 216 | for i in poi_info_list: 217 | search_info = self.get_search_info(i) 218 | search_res_list.append(self.parse_search_info(search_info, i[0])) 219 | xmldoc = minidom.parse(self.poi_xml) 220 | shops = xmldoc.getElementsByTagName('shop') 221 | for res, shop in zip(search_res_list, shops): 222 | search = xmldoc.createElement('search') 223 | for k in res: 224 | ele = xmldoc.createElement(k) 225 | ele_text = xmldoc.createTextNode(str(res[k])) 226 | ele.appendChild(ele_text) 227 | search.appendChild(ele) 228 | shop.appendChild(search) 229 | # if we don't use codecs.open(..'utf-8'), for e.g. open(), we'll meet a issue that 230 | # some Chinese characters can't be writed to the file (ascii format). 231 | f = codecs.open(self.poi_with_search_xml, 'w', 'utf-8') 232 | xmldoc.writexml(f, addindent=' ', newl='\n') 233 | f.close() 234 | 235 | 236 | if __name__ == '__main__': 237 | # poi = poi(searcher_host='10.1.1.158:4053') # for production env 238 | poi = poi() 239 | # poi_info_list = poi.get_poi_info() 240 | # poi.poi_info_to_xml(poi_info_list) 241 | poi.add_search_info_to_xml() --------------------------------------------------------------------------------