├── .gitignore ├── README.md ├── golang └── network │ ├── echo │ └── server.go │ ├── ip │ └── ip.go │ └── upload │ ├── client.go │ └── server.go ├── javascript ├── 0.num_2_thousand_seprator.js ├── array_separation.js ├── enhangced_map.js └── learn_javascript.js ├── node ├── 6vhao │ ├── index.js │ └── package.json └── baike │ ├── app.js │ ├── readme.md │ └── yuhuan.xlsx ├── paramiko ├── change_hostname.py ├── mget.py └── mscp.py ├── python ├── .gitignore ├── crawler │ ├── alexaTop500 │ │ ├── README.md │ │ ├── alexa.py │ │ └── requirements.txt │ └── pyaxel │ │ ├── pyaxel.py │ │ ├── readme.md │ │ └── requirements.txt ├── excel │ ├── huan.py │ └── huan.xlsx ├── file │ ├── alexa.py │ ├── count_filesize.py │ ├── dns_massive_resolve.py │ ├── lru.py │ └── tmpfile.py ├── network │ ├── domain-https-check.py │ ├── port_scan │ │ ├── basic.py │ │ ├── ccurt.py │ │ ├── gevent_scanner.py │ │ ├── gevt_pool.py │ │ ├── gevt_spawn.py │ │ ├── multiprocess.py │ │ └── multithread.py │ └── socket │ │ ├── asyncio_server.py │ │ ├── basic_server.py │ │ ├── client.py │ │ ├── epoll_server.py │ │ ├── gevent_server.py │ │ ├── multiprocessing_server.py │ │ ├── poll_server.py │ │ ├── select_server.py │ │ ├── selectors_server.py │ │ ├── socketserver_server.py │ │ └── threading_server.py ├── other │ ├── batch.py │ ├── count_appear_times.py │ └── random_strings.py ├── process │ ├── daemon.py │ └── sub_process.py ├── readme.md └── system │ └── hosts.py ├── script ├── README.md ├── change_hostname.py └── nginx ├── shell ├── init_centos7.sh ├── logrotate.sh ├── mysql_install.sh ├── nginx_install.sh ├── php-fpm └── python_install.sh └── software ├── README.md ├── axel-2.4-1.el6.rf.x86_64.rpm └── axel-2.4-9.el7.x86_64.rpm /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | .DS* 59 | .idea/* 60 | 61 | # NodeJS 62 | node_modules/ 63 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Learning by Coding 2 | 3 | #### 学习途径 4 | 5 | * 官方文档 - 80%的问题都可以从官方文档中找到答案 6 | * github - 上面有你想要的一切,包括整理好的文档,优秀的代码,程序员的🏠 7 | * 博客 - 优秀程序员的博客应该经常去逛逛 8 | * 社交媒体 - 比如知乎,简书,微信公众号 9 | * 问答网站 - stackoverflow, segmentfault等 10 | 11 | #### 学习方法 12 | 13 | * 学习知识最快速的方法是去使用它,以真实的小问题为切入点不断深化扩充,并总结遇到的问题,比如学习requests,我们可以试着去写一个小爬虫. 14 | * 学习知识最重要的点是多思考,多总结,知识的广度是深度的副产品,这个观点我很认同,即使现在互联网这么发达,定期关闭网络,安静思考也是很重要的. 15 | * 输出是最好的输入,写博客,去问答网站回答问题是很好的学习方法,每一次解决一个大的问题都要理清思路并整理到博客里. 16 | * 自问自答,不断地给自己提出一些问题并去搜索印证答案输出到本博客. 17 | -------------------------------------------------------------------------------- /golang/network/echo/server.go: -------------------------------------------------------------------------------- 1 | //TCPServer 2 | package main 3 | 4 | import ( 5 | "flag" 6 | "fmt" 7 | //"io" 8 | "net" 9 | "os" 10 | ) 11 | 12 | func main() { 13 | host := "0.0.0.0" 14 | port := flag.String("port", "8888", "监听端口") //指针类型, 通过*port来获取真实的值 15 | 16 | flag.Parse() 17 | 18 | listener, err := net.Listen("tcp4", host+":"+*port) 19 | if err != nil { 20 | fmt.Println("Error listening:", err) 21 | os.Exit(1) 22 | } 23 | defer listener.Close() 24 | fmt.Println("Server start at: " + host + ":" + *port) 25 | 26 | for { 27 | conn, err := listener.Accept() 28 | if err != nil { 29 | fmt.Println("Error accepting: ", err) 30 | continue 31 | } 32 | 33 | go handle(conn) 34 | 35 | } 36 | 37 | } 38 | 39 | func handle(conn net.Conn) { 40 | defer conn.Close() 41 | fmt.Printf("Received new connection: %s -> %s \n", conn.RemoteAddr(), conn.LocalAddr()) 42 | data := make([]byte, 1024*1024) 43 | for { 44 | length, _ := conn.Read(data) 45 | fmt.Println(data[:length]) 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /golang/network/ip/ip.go: -------------------------------------------------------------------------------- 1 | /* 2 | 使用方法: 3 | 1.把编译好的二进制文件放到$PATH中. 4 | 2. ip (不加参数) --> 返回本机IP的归属地, ip 8.8.8.8 , ip baidu.com 5 | */ 6 | 7 | package main 8 | 9 | import ( 10 | "fmt" 11 | "io/ioutil" 12 | "net" 13 | "net/http" 14 | "os" 15 | "regexp" 16 | "strings" 17 | "time" 18 | 19 | "github.com/PuerkitoBio/goquery" 20 | "github.com/axgle/mahonia" 21 | ) 22 | 23 | func ip138(ip string) { 24 | ip138_url := "http://www.ip138.com/ips1388.asp?ip=" + ip + "&action=2" 25 | 26 | client := &http.Client{Timeout: 5 * time.Second} 27 | 28 | req, err := http.NewRequest("GET", ip138_url, nil) 29 | req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.87 Safari/537.36") 30 | if err != nil { 31 | fmt.Println(err) 32 | } 33 | 34 | res, err := client.Do(req) 35 | if err != nil { 36 | fmt.Println(err) 37 | } 38 | defer res.Body.Close() 39 | 40 | /* 41 | 把gb2312编码转成utf8编码字符串, 这里遇到了一个坑 42 | 就是使用https://github.com/djimenez/iconv-go的时 43 | 候, 转码后的内容会无故减少, 所以这里使用mahonia 44 | */ 45 | 46 | body_reader_gbk, _ := ioutil.ReadAll(res.Body) 47 | utf8_body := mahonia.NewDecoder("gbk").ConvertString(string(body_reader_gbk)) 48 | body_reader_utf8 := strings.NewReader(utf8_body) 49 | 50 | //最后把string转成io reader后传递给goquery 51 | doc, err := goquery.NewDocumentFromReader(body_reader_utf8) 52 | if err != nil { 53 | fmt.Println(err) 54 | } 55 | 56 | fmt.Println("********************ip138********************") 57 | 58 | doc.Find("body > table > tbody > tr:nth-child(3) > td > ul > li").Each(func(index int, selection *goquery.Selection) { 59 | result, _ := selection.Html() 60 | fmt.Println(result) 61 | }) 62 | } 63 | 64 | func ipcn(ip string) { 65 | var ipcn_url string 66 | if ip == "0.0.0.0" { 67 | ipcn_url = "http://ip.cn" 68 | } else { 69 | ipcn_url = "http://ip.cn/" + ip 70 | } 71 | client := &http.Client{Timeout: 5 * time.Second} 72 | 73 | req, err := http.NewRequest("GET", ipcn_url, nil) 74 | req.Header.Set("User-Agent", "curl/7.54.0") 75 | 76 | res, err := client.Do(req) 77 | if err != nil { 78 | fmt.Println(err) 79 | os.Exit(250) 80 | } 81 | defer res.Body.Close() 82 | 83 | body, _ := ioutil.ReadAll(res.Body) 84 | if err != nil { 85 | fmt.Println(err) 86 | os.Exit(250) 87 | } 88 | 89 | fmt.Println("********************ip.cn********************") 90 | fmt.Println(string(body)) 91 | 92 | } 93 | 94 | func main() { 95 | if len(os.Args) <= 1 { 96 | ip := "0.0.0.0" 97 | ipcn(ip) 98 | } else { 99 | 100 | ip := os.Args[1] 101 | 102 | match, _ := regexp.MatchString(`\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}`, ip) 103 | 104 | if match { 105 | ipcn(ip) 106 | ip138(ip) 107 | } else { 108 | ips, _ := net.LookupIP(os.Args[1]) 109 | if len(ips) != 0 { 110 | ip := ips[0].String() 111 | ipcn(ip) 112 | ip138(ip) 113 | } else { 114 | fmt.Println("请输入正确的IP地址或者域名!") 115 | } 116 | } 117 | 118 | } 119 | 120 | } 121 | -------------------------------------------------------------------------------- /golang/network/upload/client.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "net" 7 | "os" 8 | "strconv" 9 | "time" 10 | ) 11 | 12 | func main() { 13 | 14 | var ( 15 | host = "123.249.94.160" 16 | port = "8888" 17 | server_addr = host + ":" + port 18 | file_name string 19 | merge_file_name string 20 | coroutine int 21 | bufsize int 22 | total_size int64 23 | allocated_size int64 24 | ) 25 | 26 | if len(os.Args) <= 1 { 27 | fmt.Println("使用方法: up [文件名]") 28 | return 29 | } else { 30 | file_name = os.Args[1] //待发送文件名称 31 | merge_file_name = os.Args[1] //待合并文件名称(在服务器上的文件名称) 32 | } 33 | 34 | //根据文件大小的范围确定比较合适的协程数和buffer size 35 | fileobj, err := os.OpenFile(file_name, os.O_RDONLY, 0666) 36 | if err != nil { 37 | fmt.Println("打开文件出错: ", err) 38 | return 39 | } 40 | defer fileobj.Close() 41 | 42 | filestat, err := fileobj.Stat() //获取文件状态 43 | 44 | total_size = filestat.Size() //获取文件总大小 45 | //当文件小于1M的时候 46 | if total_size < 1048576 { 47 | coroutine = 1 //协程数量或拆分文件的数量 48 | bufsize = 1048576 //单次发送数据的大小 49 | } else if total_size > 1048576 && total_size < 10485760 { 50 | coroutine = 4 //协程数量或拆分文件的数量 51 | bufsize = 1024 * 4 //单次发送数据的大小 52 | } else { 53 | coroutine = 8 //协程数量或拆分文件的数量 54 | bufsize = 1024 * 10 //单次发送数据的大小 55 | } 56 | 57 | allocated_size = total_size / int64(coroutine) //每个协程要传输的文件大小 58 | 59 | fmt.Printf("文件总大小: %d, 每个协程分配的文件大小: %d\n", total_size, allocated_size) 60 | 61 | starttime := time.Now().Unix() //时间戳格式 62 | //对待发送文件进行拆分计算并调用发送方法 63 | ch := make(chan string) 64 | var start_pos int64 = 0 65 | for coroutine_num := 0; coroutine_num < coroutine; coroutine_num++ { 66 | //server_addr: 远端地址 67 | //ch: 管道, 用于同步协程 68 | //coroutine_num: 协程序号 69 | //bufsize: socket单次发送数据块的大小 70 | //file_name: 客户端上传文件的名字 71 | //merge_file_name: 服务端重命名文件名称 72 | //start_pos: 当前协程开始的位置, 每次启动一个新的协程后, start的指针会向后移动 73 | //start+littleSize当前协程的结束位置, 如果是最后一个协程的话就把结束位置置为文件的最后, 也就是size 74 | if coroutine_num == coroutine-1 { 75 | go sendFile(server_addr, ch, coroutine_num, bufsize, file_name, merge_file_name, start_pos, total_size) 76 | fmt.Println(start_pos, total_size, bufsize) 77 | } else { 78 | go sendFile(server_addr, ch, coroutine_num, bufsize, file_name, merge_file_name, start_pos, start_pos+allocated_size) 79 | fmt.Println(start_pos, start_pos+allocated_size) 80 | } 81 | 82 | start_pos += allocated_size 83 | } 84 | 85 | //同步等待发送文件的协程 86 | for j := 0; j < coroutine; j++ { 87 | fmt.Println(<-ch) 88 | } 89 | 90 | midtime := time.Now().Unix() 91 | sendtime := midtime - starttime 92 | fmt.Printf("发送耗时: %d 秒\n", sendtime) 93 | 94 | sendMergeCommand(server_addr, merge_file_name, coroutine) //发送文件合并指令及文件名 95 | 96 | endtime := time.Now().Unix() 97 | mergetime := endtime - midtime 98 | fmt.Printf("合并耗时: %d 秒\n", mergetime) 99 | 100 | tot := endtime - starttime 101 | fmt.Printf("总计耗时:%d 分 %d 秒 \n", tot/60, tot%60) 102 | 103 | } 104 | 105 | func sendFile(server_addr string, c chan string, coroutineNum int, size int, fileName, mergeFileName string, start int64, end int64) { 106 | 107 | con, err := net.Dial("tcp", server_addr) 108 | if err != nil { 109 | fmt.Println("服务器连接失败!") 110 | os.Exit(-1) 111 | } 112 | defer con.Close() 113 | fmt.Println(coroutineNum, "连接已建立.文件发送中...") 114 | 115 | var by [1]byte 116 | by[0] = byte(coroutineNum) 117 | var bys []byte 118 | databuf := bytes.NewBuffer(bys) //数据缓冲变量 119 | databuf.Write(by[:]) 120 | databuf.WriteString(mergeFileName) 121 | bb := databuf.Bytes() 122 | // bb := by[:] 123 | // fmt.Println(bb) 124 | in, err := con.Write(bb) //向服务器发送当前协程的顺序,代表拆分文件的顺序, 以及待合并的文件名称 125 | if err != nil { 126 | fmt.Printf("向服务器发送数据错误: %d\n", in) 127 | os.Exit(-1) 128 | } 129 | 130 | var msg = make([]byte, 1024) //创建读取服务端信息的切片 131 | lengthh, err := con.Read(msg) //确认服务器已收到顺序数据 132 | if err != nil { 133 | fmt.Printf("读取服务器数据错误.\n", lengthh) 134 | os.Exit(-1) 135 | } 136 | str := string(msg[0:lengthh]) 137 | fmt.Println("服务端收到顺序号回应: ", str) 138 | 139 | //打开待发送文件,准备发送文件数据 140 | file, err := os.OpenFile(fileName, os.O_RDWR, 0666) 141 | if err != nil { 142 | fmt.Println(fileName, "-文件打开错误.") 143 | os.Exit(-1) 144 | } 145 | defer file.Close() 146 | 147 | file.Seek(start, 0) //设定读取文件的位置, 第二个参数0表示start是相对于文件开头的位置 148 | 149 | buf := make([]byte, size) //创建用于保存读取文件数据的切片, size就是buffsize 150 | 151 | var sendDtaTolNum int = 0 //记录发送成功的数据量(Byte) 152 | //读取并发送数据 153 | for i := start; int64(i) < end; i += int64(size) { 154 | length, err := file.Read(buf) //读取数据到切片中 155 | if err != nil { 156 | fmt.Println("读文件错误", i, coroutineNum, end) 157 | os.Exit(-1) 158 | } 159 | 160 | //判断读取的数据长度与切片的长度是否相等,如果不相等,表明文件读取已到末尾 161 | if length == size { 162 | //判断此次读取的数据是否在当前协程读取的数据范围内,如果超出,则去除多余数据,否则全部发送 163 | if int64(i)+int64(size) >= end { 164 | sendDataNum, err := con.Write(buf[:size-int((int64(i)+int64(size)-end))]) 165 | if err != nil { 166 | fmt.Printf("向服务器发送数据错误: %d\n", sendDataNum) 167 | os.Exit(0) 168 | } 169 | sendDtaTolNum += sendDataNum 170 | } else { 171 | sendDataNum, err := con.Write(buf) 172 | if err != nil { 173 | fmt.Printf("向服务器发送数据错误: %d\n", sendDataNum) 174 | os.Exit(0) 175 | } 176 | sendDtaTolNum += sendDataNum 177 | } 178 | 179 | } else { 180 | //这种情况发生在最后一个协程读取最后一个数据块的时候, 因为没有buffersize大, 所以会出现读取的长度小于buffersize 181 | //这时候把这个数据块的所有内容发给服务端即可 182 | sendDataNum, err := con.Write(buf[:length]) 183 | if err != nil { 184 | fmt.Printf("向服务器发送数据错误: %d\n", sendDataNum) 185 | os.Exit(-1) 186 | } 187 | sendDtaTolNum += sendDataNum 188 | } 189 | 190 | //读取服务器端信息,确认服务端已接收数据 191 | lengths, err := con.Read(msg) 192 | if err != nil { 193 | fmt.Printf("读取服务器数据错误.\n", lengths) 194 | os.Exit(-1) 195 | } 196 | //str := string(msg[0:lengths]) 197 | //fmt.Println("服务端收到数据块后回信息: ", str) 198 | 199 | } 200 | 201 | fmt.Println(coroutineNum, "发送数据(Byte): ", sendDtaTolNum) 202 | 203 | c <- strconv.Itoa(coroutineNum) + " 协程退出" 204 | } 205 | 206 | func sendMergeCommand(server_addr, mergeFileName string, coroutine int) { 207 | 208 | con, err := net.Dial("tcp", server_addr) 209 | if err != nil { 210 | fmt.Println("服务器连接失败!") 211 | os.Exit(-1) 212 | return 213 | } 214 | defer con.Close() 215 | fmt.Println("连接已建立. 发送合并指令.\n文件合并中...") 216 | 217 | var by [1]byte 218 | by[0] = byte(coroutine) 219 | var bys []byte 220 | databuf := bytes.NewBuffer(bys) //数据缓冲变量 221 | databuf.WriteString("fileover") 222 | databuf.Write(by[:]) 223 | databuf.WriteString(mergeFileName) 224 | cmm := databuf.Bytes() 225 | 226 | in, err := con.Write(cmm) 227 | if err != nil { 228 | fmt.Printf("向服务器发送数据错误: %d\n", in) 229 | } 230 | 231 | var msg = make([]byte, 1024) 232 | lengthh, err := con.Read(msg) 233 | if err != nil { 234 | fmt.Printf("读取服务器数据错误.\n", lengthh) 235 | os.Exit(0) 236 | } 237 | str := string(msg[0:lengthh]) 238 | fmt.Println("传输完成(服务端信息): ", str) 239 | } 240 | -------------------------------------------------------------------------------- /golang/network/upload/server.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "net" 6 | "os" 7 | "strconv" 8 | ) 9 | 10 | func main() { 11 | 12 | var ( 13 | host = "0.0.0.0" 14 | port string 15 | ) 16 | 17 | if len(os.Args) <= 1 { 18 | port = "8888" 19 | } else { 20 | port = os.Args[1] 21 | } 22 | 23 | server_addr := host + ":" + port 24 | 25 | listen, err := net.Listen("tcp", server_addr) 26 | 27 | if err != nil { 28 | fmt.Println("服务启动发生错误: ", err) 29 | os.Exit(-1) 30 | } 31 | 32 | defer listen.Close() 33 | 34 | fmt.Printf("Server started at: %s...\n", server_addr) 35 | 36 | for { 37 | conn, err := listen.Accept() 38 | 39 | if err != nil { 40 | fmt.Println("客户端连接错误: ", err.Error()) 41 | continue 42 | } 43 | 44 | go receiveFile(conn) 45 | } 46 | } 47 | 48 | func receiveFile(conn net.Conn) { 49 | var ( 50 | end_flag string 51 | temp_file_name string //保存临时文件名称 52 | data = make([]byte, 1024*1024) //用于保存接收的数据的切片 53 | file_num int //当前协程接收的数据在原文件中的位置 54 | ) 55 | defer conn.Close() 56 | 57 | fmt.Println("收到新的连接请求: ", conn.RemoteAddr()) 58 | 59 | j := 0 //标记接收数据的次数 60 | size := 0 61 | for { 62 | length, err := conn.Read(data) 63 | 64 | if err != nil { 65 | fmt.Printf("客户端: %v已断开. 协程号: %2d\n", conn.RemoteAddr(), file_num) 66 | return 67 | } 68 | //每个新建立的连接的第一次接收的数据是一些该数据块的信息, 包括是否结束字段, 该数据块的协程号, 该文件的名称等 69 | if j == 0 { 70 | end_flag = string(data[0:8]) 71 | if end_flag == "fileover" { 72 | xienum := int(data[8]) 73 | merge_file_name := string(data[9:length]) 74 | go mainMergeFile(xienum, merge_file_name) //合并临时文件,生成有效文件 75 | end_flag = "文件接收完成: " + merge_file_name 76 | conn.Write([]byte(end_flag)) 77 | fmt.Println(merge_file_name, "文件接收完成") 78 | return 79 | 80 | } else { //创建临时文件 81 | file_num = int(data[0]) 82 | temp_file_name = string(data[1:length]) + strconv.Itoa(file_num) 83 | fmt.Println("创建临时文件:", temp_file_name) 84 | fout, err := os.Create(temp_file_name) 85 | if err != nil { 86 | fmt.Println("创建临时文件错误: ", temp_file_name) 87 | return 88 | } 89 | fout.Close() 90 | } 91 | } else { 92 | writeTempFileEnd(temp_file_name, data[0:length]) 93 | size += length 94 | } 95 | 96 | end_flag = strconv.Itoa(file_num) + " 接收完成" 97 | conn.Write([]byte(end_flag)) 98 | j++ 99 | } 100 | fmt.Println(file_num) 101 | fmt.Println(size) 102 | 103 | } 104 | 105 | func writeTempFileEnd(filename string, data []byte) { 106 | temp_file, err := os.OpenFile(filename, os.O_APPEND|os.O_RDWR, 0666) 107 | if err != nil { 108 | fmt.Println("打开临时文件错误", err) 109 | return 110 | } 111 | defer temp_file.Close() 112 | temp_file.Write(data) 113 | } 114 | 115 | func mainMergeFile(block_number int, filename string) { 116 | 117 | file, err := os.Create(filename) 118 | if err != nil { 119 | fmt.Println("创建有效文件错误: ", err) 120 | return 121 | } 122 | defer file.Close() 123 | 124 | //依次对临时文件进行合并, 这里没有使用协程, 因为文件是有顺序的 125 | for i := 0; i < block_number; i++ { 126 | mergeFile(filename+strconv.Itoa(i), file) 127 | } 128 | 129 | //删除生成的临时文件 130 | for i := 0; i < block_number; i++ { 131 | os.Remove(filename + strconv.Itoa(i)) 132 | } 133 | 134 | } 135 | 136 | /* 137 | * 将指定临时文件合并到有效文件中 138 | * 2013-09-26 139 | * 李林 140 | * 141 | * rfilename 临时文件名称 142 | * wfile 有效文件 143 | */ 144 | func mergeFile(temp_file string, wfile *os.File) { 145 | 146 | rfile, err := os.OpenFile(temp_file, os.O_RDONLY, 0666) 147 | 148 | if err != nil { 149 | fmt.Println("合并时打开临时文件错误: ", temp_file) 150 | return 151 | } 152 | 153 | defer rfile.Close() 154 | 155 | stat, _ := rfile.Stat() 156 | 157 | num := stat.Size() 158 | 159 | buf := make([]byte, 1024*1024) 160 | 161 | for i := 0; int64(i) < num; { 162 | length, err := rfile.Read(buf) 163 | 164 | if err != nil { 165 | fmt.Println("读取文件错误") 166 | } 167 | i += length 168 | 169 | wfile.Write(buf[:length]) 170 | } 171 | 172 | } 173 | -------------------------------------------------------------------------------- /javascript/0.num_2_thousand_seprator.js: -------------------------------------------------------------------------------- 1 | "use strict" 2 | 3 | function divideBythree(intPart) { //typeof intPart string 4 | var times = intPart.length / 3; 5 | var arr = []; 6 | for (let i=0; i [[123, 456, 789], [123, 456, 789], [123]] 3 | // 4 | // 5 | function separation(arr, len) { 6 | if (arr.length <= len) { 7 | return arr; 8 | } else { 9 | var newArr = []; 10 | var quotient = parseInt(arr.length / len); //商 11 | var remainder = arr.length % len; //余数 12 | for (let i = 0; i < quotient; i++) { 13 | newArr.push(arr.slice(len*i, len*i + len)); 14 | } 15 | 16 | if (remainder !== 0) { 17 | newArr.push(arr.slice(-remainder)); 18 | } 19 | 20 | return newArr; 21 | } 22 | 23 | } 24 | 25 | var arr = [123, 456, 789, 123, 456, 789]; 26 | var len = 1; 27 | console.log(separation(arr, len)); 28 | 29 | 30 | -------------------------------------------------------------------------------- /javascript/enhangced_map.js: -------------------------------------------------------------------------------- 1 | //工作中经常遇到求两个相同长度的array的和,比如x = [1,2,3,4], y = [5,6,7,8] ,结果为[6,8,10,12],如果只要一个地方使用的话,用for loop就可以了 2 | //但是多个地方都是用的话,代码看上去就会很冗余,所以就为所有的array添加一个方法. 3 | 4 | Array.prototype.SumArray = function(arr) { 5 | var sum = []; 6 | if (arr != null && this.length == arr.length) { 7 | for (let i = 0; i < this.length; i++) { 8 | sum.push(this[i] + arr[i]); 9 | } 10 | } 11 | return sum; 12 | } 13 | 14 | //test 15 | var x = [1,2,3,4]; 16 | var y = [5,6,7,8]; 17 | var z = x.SumArray(y); 18 | console.log(z); 19 | 20 | -------------------------------------------------------------------------------- /javascript/learn_javascript.js: -------------------------------------------------------------------------------- 1 | //给定一个字符串,求出这个字符串中出现频率最高的那个字符以及次数; 2 | function word_max_count(word) { 3 | var obj = {}; 4 | var arr = []; 5 | 6 | for (let i=0, len=word.length; i div.col4 > div > div:nth-child(2) > select")[0].children; 21 | for(let i = 0, len = select.length; i < len; i++) { 22 | linksArray.push(select[i].attribs.value); 23 | } 24 | return linksArray; 25 | }, 26 | getDetailPage: function(url){ 27 | //If null, the body is returned as a Buffer. 28 | request({"encoding": null, "url": url, "timeout": VHao.timeout}, function(error, response, body) { 29 | if (!error && response.statusCode == 200) { 30 | //Buffer to JS string 31 | var html = iconv.decode(body, "gb2312"); 32 | var $ = cheerio.load(html); 33 | $(".list > li").each(function(index, element) { 34 | var item = element.children[1]; 35 | var movieName = VHao.getMovieName(item); //这里不能使用this,因为this指向element 36 | var movieURL = item.attribs.href; 37 | VHao.getDownloadLink(movieURL, movieName); 38 | }) 39 | } else { 40 | console.log(error); 41 | } 42 | }) 43 | }, 44 | getDownloadLink: function(url, name) { 45 | request({"encoding": null, "url": url, "timeout": VHao.timeout}, function(error, response, body) { 46 | if (!error && response.statusCode == 200) { 47 | var html = iconv.decode(body, "gb2312"); 48 | var $ = cheerio.load(html); 49 | //console.log($("#endText table tbody tr")[0].children[1].children); 50 | try { 51 | var downDomTree = $("#endText table tbody tr")[0].children[1].children; 52 | for(let index = 0; index < downDomTree.length; index++) { 53 | if(downDomTree[index].name === "a") { 54 | //console.log(downDomTree[index].attribs.href); 55 | console.log(name); 56 | //fs.appendFile(this.errorFileName, name + "\r\n", function(error) { 57 | // if (error) { 58 | // console.log(error); 59 | // } 60 | //}) 61 | break; 62 | } 63 | } 64 | 65 | } catch(e) { 66 | fs.appendFile(VHao.errorFileName, name + " ---> " + url + "\r\n", function(error) { 67 | if (error) { 68 | console.log(error); 69 | } 70 | }) 71 | } 72 | } else { 73 | console.log(error); 74 | } 75 | }) 76 | }, 77 | getMovieName: function(item) { 78 | if (item.children[0].hasOwnProperty("data")) { 79 | return item.children[0].data; 80 | } else { 81 | return VHao.getMovieName(item.children[0]); 82 | } 83 | }, 84 | start: function() { 85 | var links = this.getAllPages(); 86 | for(let i = 0; i < links.length; i++) { 87 | this.getDetailPage(links[i]); 88 | } 89 | } 90 | }; 91 | 92 | VHao.start(); 93 | //VHao.getDetailPage("http://www.6vhao.com/s/xiju/index_16.html"); 94 | //VHao.getDownloadLink("http://www.6vhao.com/dy/2017-05-21/ManHaoZhenTan.html", "xx"); 95 | -------------------------------------------------------------------------------- /node/6vhao/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "6vhao", 3 | "version": "1.0.0", 4 | "description": "crawler the 6vhao.com", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "node index.js" 8 | }, 9 | "author": "", 10 | "license": "ISC", 11 | "dependencies": { 12 | "cheerio": "^0.22.0", 13 | "iconv-lite": "^0.4.17", 14 | "request": "^2.81.0", 15 | "sync-request": "^4.0.3" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /node/baike/app.js: -------------------------------------------------------------------------------- 1 | "use strict" 2 | 3 | var xlsx = require('node-xlsx'); //处理Excel的模块 4 | var request = require('sync-request'); //处理HTTP请求的模块, 类似AJAX, 可以发送POST/GET请求 5 | var fs = require('fs'); //读写文件模块, 为了保存新生成的Excel文件 6 | 7 | //获取指定电影的百科地址 8 | var get_baike_url = function (name) { 9 | var url = encodeURI('http://baike.baidu.com/search/word?word=' + name); 10 | var options = { 11 | followRedirects: false, 12 | timeout: 5000, 13 | retry: true 14 | } 15 | resp = request('GET', url, options); 16 | return resp.headers.location; 17 | } 18 | 19 | 20 | var excel_file = xlsx.parse('yuhuan.xlsx'); //读取同级目录下的Excel文件 21 | 22 | //遍历影视名称, 执行搜索并填写到Excel文件中 23 | for (let i = 0, len = excel_file[0]['data'].length; i < len; i++) { 24 | 25 | console.log("正在搜索电影: " + excel_file[0]['data'][i][1]); 26 | //获取百科地址 27 | var baike_url = get_baike_url(excel_file[0]['data'][i][1]); 28 | //把获取的百科地址追加到指定的数组中 29 | excel_file[0]['data'][i][6] = baike_url; 30 | } 31 | 32 | //构建新的Excel文件 33 | var buffer = xlsx.build(excel_file); 34 | //保存新的Excel文件到本地磁盘的当前目录 35 | fs.writeFileSync('yuhuan.xlsx', buffer, {'flag':'w'}); // 如果文件存在,覆盖 36 | 37 | -------------------------------------------------------------------------------- /node/baike/readme.md: -------------------------------------------------------------------------------- 1 | 12 2 | -------------------------------------------------------------------------------- /node/baike/yuhuan.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wufeiqun/LearnByCoding/4fc2c11601922b418b864820cb29bbb98fb65d29/node/baike/yuhuan.xlsx -------------------------------------------------------------------------------- /paramiko/change_hostname.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #coding: utf-8 3 | #修改本机的主机名称为eth0的网卡的IP地址(点转化为了下划线) 4 | import sys 5 | import socket 6 | import fcntl 7 | import struct 8 | import platform 9 | import socket 10 | import subprocess 11 | 12 | iface = "eth0" 13 | 14 | def get_ip_address(ifname): 15 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 16 | return socket.inet_ntoa(fcntl.ioctl(s.fileno(), 0x8915, struct.pack('256s', ifname[:15]))[20:24]) 17 | 18 | def get_sys_version(): 19 | release = platform.release() 20 | if "el6" in release: 21 | return "el6" 22 | elif "el7" in release: 23 | return "el7" 24 | else: 25 | return "unknown" 26 | 27 | def change(): 28 | version = get_sys_version() 29 | ip = get_ip_address(iface) 30 | ip_new = ip.replace(".", "_") 31 | if version == "el6": 32 | subprocess.call(["hostname", ip_new]) 33 | subprocess.call(["cp", "/etc/sysconfig/network", "/tmp/network.bak"]) 34 | with open("/etc/sysconfig/network", "r") as f: 35 | ret = f.readlines() 36 | ret[1] = "HOSTNAME={0}\n".format(ip_new) 37 | with open("/etc/sysconfig/network", "w") as f: 38 | f.writelines(ret) 39 | 40 | elif version == "el7": 41 | subprocess.call(["hostnamectl", "--static", "set-hostname", ip_new]) 42 | subprocess.call(["hostnamectl", "set-hostname", ip_new]) 43 | else: 44 | print("{0} version unknown!".format(ip)) 45 | 46 | 47 | def show(): 48 | ip = get_ip_address("eth0") 49 | version = get_sys_version() 50 | ip_new = ip.replace(".", "_") 51 | print("ip={0}, version={1}, ip_new={2}, hostname={3}".format(ip, version, ip_new, socket.gethostname())) 52 | 53 | if __name__ == "__main__": 54 | change() 55 | show() 56 | -------------------------------------------------------------------------------- /paramiko/mget.py: -------------------------------------------------------------------------------- 1 | #!/home/jumpserver/python3/bin/python3 2 | #从多台远程机器上传输相同目录的文件到本地, 并且重命名,为了区分哪一个机器来的, 命名规则为文件名字前边加上IP地址 3 | # 这里使用的是密钥登陆, 所以没有指定账号密码, 可以修改 4 | # 远程的文件是通过find正则匹配到的文件列表 5 | import os 6 | import threading 7 | 8 | import paramiko 9 | 10 | 11 | class MultiSCP: 12 | def __init__(self, remote_path, local_path, filename, iplist, username, port=22): 13 | self.remote_path = remote_path 14 | self.local_path = local_path 15 | self.filename = filename 16 | self.iplist = iplist 17 | self.username = username 18 | self.port = port 19 | 20 | def get_iplist(self): 21 | with open(self.iplist) as f: 22 | iplist = f.readlines() 23 | iplist = [ip.strip() for ip in iplist] 24 | return iplist 25 | 26 | def copy_to_local(self, ip): 27 | client = paramiko.SSHClient() 28 | client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) 29 | client.connect(ip, self.port, self.username) 30 | 31 | sftp = paramiko.SFTPClient.from_transport(client.get_transport()) 32 | 33 | cmd = "find {0} -type f -name {1}".format(self.remote_path, self.filename) 34 | stdin, stdout, stderr = client.exec_command(cmd) 35 | out, err = stdout.readlines(), stderr.read() 36 | if err: 37 | print("{0}遇到了点问题: {1}".format(ip, err)) 38 | else: 39 | for abs_fname in out: 40 | abs_fname = abs_fname.strip() 41 | fname = os.path.basename(abs_fname) 42 | new_fname = "{0}_{1}".format(ip, fname) 43 | nf_local_path = os.path.join(local_path, new_fname) 44 | sftp.get(abs_fname, nf_local_path) 45 | print("{0} 传输完毕!".format(new_fname)) 46 | 47 | def run(self): 48 | threads = [] 49 | iplist = self.get_iplist() 50 | for ip in iplist: 51 | print("开始处理服务器: {0}...".format(ip)) 52 | t = threading.Thread(target=self.copy_to_local, args=(ip,)) 53 | t.start() 54 | threads.append(t) 55 | for thread in threads: 56 | thread.join() 57 | 58 | if __name__ == "__main__": 59 | remote = "/home/xxx/applogs/app_stat/dpool/miaopai_*_20170915*.log.gz" 60 | remote_path = os.path.dirname(remote) 61 | local_path = "/home/xxx/bigdata/20170915" 62 | filename = os.path.basename(remote) 63 | iplist = "/home/xxx/iplist.txt" 64 | #iplist = "test.txt" 65 | username = "xxx" 66 | port = 22 67 | mscp = MultiSCP(remote_path, local_path, filename, iplist, username) 68 | mscp.run() 69 | -------------------------------------------------------------------------------- /paramiko/mscp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #coding: utf-8 3 | #批量复制文件到多台远程机器 4 | import os 5 | import threading 6 | 7 | import paramiko 8 | 9 | 10 | class MultiSCP: 11 | def __init__(self, remote_path, local_path, filename, iplist, username, password=None, port=22): 12 | self.remote_path = remote_path 13 | self.local_path = local_path 14 | self.filename = filename 15 | self.iplist = iplist 16 | self.username = username 17 | self.password = password 18 | self.port = port 19 | 20 | def get_iplist(self): 21 | with open(self.iplist) as f: 22 | iplist = f.readlines() 23 | iplist = [ip.strip() for ip in iplist] 24 | return iplist 25 | 26 | def send(self, ip): 27 | client = paramiko.SSHClient() 28 | client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) 29 | if self.password: 30 | client.connect(ip, self.port, self.username, self.password, timeout=10) 31 | else: 32 | client.connect(ip, self.port, self.username, timeout=10) 33 | 34 | sftp = paramiko.SFTPClient.from_transport(client.get_transport()) 35 | 36 | local_path = os.path.join(self.local_path, self.filename) 37 | remote_path = os.path.join(self.remote_path, self.filename) 38 | sftp.put(local_path, remote_path) 39 | print("{0}: {1} 传输完毕!".format(ip, self.filename)) 40 | 41 | def run(self): 42 | threads = [] 43 | iplist = self.get_iplist() 44 | for ip in iplist: 45 | print("开始处理服务器: {0}...".format(ip)) 46 | t = threading.Thread(target=self.send, args=(ip,)) 47 | t.start() 48 | threads.append(t) 49 | for thread in threads: 50 | thread.join() 51 | 52 | if __name__ == "__main__": 53 | remote_path = "/home/rocky/tmp" 54 | local_path = "/Users/rocky/tmp" 55 | filename = "test.txt" 56 | iplist = "iplist.txt" 57 | username = "rocky" 58 | mscp = MultiSCP(remote_path, local_path, filename, iplist, username) 59 | mscp.run() 60 | -------------------------------------------------------------------------------- /python/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | 43 | # Translations 44 | *.mo 45 | *.pot 46 | 47 | # Django stuff: 48 | *.log 49 | 50 | # Sphinx documentation 51 | docs/_build/ 52 | 53 | # PyBuilder 54 | target/ 55 | -------------------------------------------------------------------------------- /python/crawler/alexaTop500/README.md: -------------------------------------------------------------------------------- 1 | # alexaTop500 2 | Screwling the top 500 websites of the http://www.alexa.com/topsites/global;0 ,just for learning [gevent](https://github.com/gevent/gevent)~ 3 | 4 | ####Introduction 5 | 6 | --- 7 | 8 |  Before 10.1 holidays,I attend Alibaba's Interview through telephone,one of the asked questions is about `gevent`, and I was not used it in my work at all,so I learn it the whole holidays.This is a small python script to compare gevent with multiprocessing and multithreading.Any better ideas,just give me an issue,thanks. 9 | 10 | ####Usage 11 | 12 | --- 13 | 14 | Just clone it and run: 15 | 16 | ``` 17 | pip install requirements.txt 18 | python alexa.py 19 | ``` 20 | 21 | ####ScreenShots 22 | 23 | --- 24 | 25 | ``` 26 | 27 | ******************************common****************************** 28 | Total domains: 500 29 | Total used 10.90 seconds. 30 | ******************************multithreading****************************** 31 | Total domains: 500 32 | Total used 1.42 seconds. 33 | ******************************gevent****************************** 34 | Total domains: 500 35 | Total used 1.32 seconds. 36 | 37 | ``` 38 | -------------------------------------------------------------------------------- /python/crawler/alexaTop500/alexa.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | import gevent 4 | from gevent import monkey 5 | monkey.patch_socket() 6 | import time 7 | import socket 8 | import traceback 9 | import threading 10 | import multiprocessing 11 | import requests 12 | from bs4 import BeautifulSoup 13 | 14 | 15 | class Site(object): 16 | """ 17 | To test the advantage of the gevent compared to multithreading, 18 | multiprocessing,I crawled the http://www.alexa.com/topsites/global 19 | and resovle the domain names contains in the page. 20 | """ 21 | def __init__(self): 22 | # "http://www.alexa.com/topsites/global;[0-19]" 23 | self.baseURL = "http://www.alexa.com/topsites/global;" 24 | self.timeout = 5 # seconds 25 | self.totalPage = 20 # 0-19 26 | self.domain = [] 27 | self.ipaddr = [] 28 | 29 | def getURList(self, page): 30 | """Get all the domains of the page""" 31 | urList = [] 32 | req = requests.get(self.baseURL+str(page)) 33 | soup = BeautifulSoup(req.text, "lxml") 34 | lst = soup.find_all(class_="site-listing") 35 | for l in lst: 36 | self.domain.append(l.find("a").string) 37 | 38 | def resolve(self, domain): 39 | """ 40 | socket.getaddrinfo(host, port[, family[, socktype[, proto[, flags]]]]) 41 | Just return the Ipv4 TCP info. 42 | """ 43 | try: 44 | ret = socket.getaddrinfo(domain, 80, 2, 1, 6) 45 | ip = ret[0][4][0] 46 | self.ipaddr.append(ip) 47 | except Exception, e: 48 | print traceback.format_exc(e) 49 | 50 | def common(self): 51 | """ 52 | usual method with for ... in xrange. 53 | """ 54 | start = time.time() 55 | for page in xrange(self.totalPage): 56 | self.getURList(page) 57 | end = time.time() 58 | print "***" * 10 + "common" + "***" * 10 59 | print "Total domains: {0}".format(len(self.domain)) 60 | print "Total used %0.2f seconds." % (end - start) 61 | 62 | def multithread(self): 63 | """ 64 | Using multithread. 65 | """ 66 | self.domain = [] # clear it before run 67 | start = time.time() 68 | threads = [] 69 | for page in xrange(self.totalPage): 70 | t = threading.Thread(target=self.getURList, args=(page,)) 71 | t.start() 72 | threads.append(t) 73 | for thread in threads: 74 | thread.join() 75 | end = time.time() 76 | print "***" * 10 + "multithreading" + "***" * 10 77 | print "Total domains: {0}".format(len(self.domain)) 78 | print "Total used %0.2f seconds." % (end - start) 79 | 80 | def multiprocess(self): 81 | """ 82 | Using multiprocess. 83 | """ 84 | self.domain = multiprocessing.Manager().list() 85 | start = time.time() 86 | processes = [] 87 | for page in xrange(self.totalPage): 88 | p = multiprocessing.Process(target=self.getURList, args=(page,)) 89 | p.start() 90 | processes.append(p) 91 | for process in processes: 92 | process.join() 93 | end = time.time() 94 | print "***" * 10 + "multiprocessing" + "***" * 10 95 | print "Total domains: {0}".format(len(self.domain)) 96 | print "Total used %0.2f seconds." % (end - start) 97 | 98 | def gevt(self): 99 | """ 100 | Using gevent. 101 | """ 102 | self.domain = [] 103 | start = time.time() 104 | greenlets = [] 105 | for page in xrange(self.totalPage): 106 | greenlets.append(gevent.spawn(self.getURList, page)) 107 | gevent.joinall(greenlets) 108 | end = time.time() 109 | print "***" * 10 + "gevent" + "***" * 10 110 | print "Total domains: {0}".format(len(self.domain)) 111 | print "Total used %0.2f seconds." % (end - start) 112 | 113 | 114 | if __name__ == "__main__": 115 | site = Site() 116 | site.common() 117 | site.multithread() 118 | # site.multiprocess() 119 | site.gevt() 120 | -------------------------------------------------------------------------------- /python/crawler/alexaTop500/requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.5.1 2 | bs4==0.0.1 3 | gevent==1.1.2 4 | greenlet==0.4.10 5 | lxml==3.6.4 6 | requests==2.11.1 7 | -------------------------------------------------------------------------------- /python/crawler/pyaxel/pyaxel.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Usage: 4 | pyaxel.py 5 | pyaxel.py [--thread=] 6 | 7 | Options: 8 | -h --help Show this screen. 9 | -V --version Show version. 10 | --thread= Thread number [default: 10]. 11 | 12 | """ 13 | import os 14 | import sys 15 | import glob 16 | import time 17 | import signal 18 | import hashlib 19 | import tempfile 20 | import traceback 21 | import threading 22 | import contextlib 23 | import urllib.request 24 | 25 | from docopt import docopt 26 | 27 | 28 | class Downloader: 29 | """Light command line download accelerator""" 30 | def __init__(self, url, thread_num): 31 | signal.signal(signal.SIGINT, self.handler) 32 | self.url = self.get_url(url) 33 | self.thread_num = thread_num 34 | self.filename = self.url.split("/")[-1] 35 | # Where to save the downloaded file, default is your current dir. 36 | self.put_dir = "." 37 | # Place to save the temporary partial files 38 | self.tmp_dir = tempfile.TemporaryDirectory() 39 | self.filesize = self.get_filesize() 40 | self.alloc = self.filesize // self.thread_num 41 | 42 | def get_url(self, url): 43 | """Get real URL when status code is 301, 302, etc""" 44 | req = urllib.request.Request(url, method="HEAD") 45 | with contextlib.closing(urllib.request.urlopen(req)) as resp: 46 | real_url = resp.geturl() 47 | if url != real_url: 48 | return self.get_url(real_url) 49 | else: 50 | return url 51 | 52 | def get_filesize(self): 53 | """Get content-length from headers (byte)""" 54 | req = urllib.request.Request(self.url, method="HEAD") 55 | with contextlib.closing(urllib.request.urlopen(req)) as resp: 56 | # Check if the server support ranges(multithreading) 57 | if resp.headers.get("Accept-Ranges") == "bytes" and resp.headers.get("Content-Length"): 58 | filesize = int(resp.headers.get("Content-Length")) 59 | return filesize 60 | else: 61 | print("The server does not support multithread, start common download...") 62 | urllib.request.urlretrieve(self.url, os.path.join(self.put_dir, self.filename)) 63 | 64 | 65 | def download(self, start, end): 66 | """Download file separately""" 67 | bs = 1024 * 8 # block size (byte) 68 | headers = {"Range": "bytes={0:d}-{1:d}".format(start, end)} 69 | # Use the thread id as the partial filename, deleted after merged. 70 | filename = threading.current_thread().name.split("-")[1] 71 | 72 | req = urllib.request.Request(self.url, headers=headers) 73 | 74 | try: 75 | with contextlib.closing(urllib.request.urlopen(req)) as resp: 76 | with open(os.path.join(self.tmp_dir.name, filename), "wb") as f: 77 | while 1: 78 | block = resp.read(bs) 79 | if not block: 80 | break 81 | f.write(block) 82 | except KeyboardInterrupt: 83 | print("Catch KeyboardInterrupt, thread {0} will exit".format(filename)) 84 | sys.exit(1) 85 | 86 | def merge(self): 87 | """Merge all the files orderly and checksum the data""" 88 | # if filename exists then append suffix with nums like "filename.0", "filename.1" etc. 89 | if os.path.exists(os.path.join(self.put_dir, self.filename)): 90 | num_files = glob.glob(os.path.join(self.put_dir, self.filename + ".*")) 91 | if num_files: 92 | max_num = max([int(num_file.split(".")[-1]) for num_file in num_files]) 93 | self.filename = self.filename + "." + str(max_num + 1) 94 | else: 95 | self.filename = self.filename + ".1" 96 | 97 | merged_file = open(os.path.join(self.put_dir, self.filename), "wb") 98 | 99 | # Order partial file by thread id 100 | sorted_flist = sorted(os.listdir(self.tmp_dir.name), key=lambda x: int(x)) 101 | # Merged the partial file 102 | for partial in sorted_flist: 103 | with open(os.path.join(self.tmp_dir.name, partial), "br") as f: 104 | while 1: 105 | block = f.read(1024*8) 106 | if not block: 107 | break 108 | merged_file.write(block) 109 | merged_file.close() 110 | # Clean the tmpdir 111 | self.tmp_dir.cleanup() 112 | # Check the filesize 113 | retrive_size = os.stat(os.path.join(self.put_dir, self.filename)).st_size 114 | if retrive_size < self.filesize: 115 | raise ContentTooShortError( 116 | "retrieval incomplete: got only {0:d} out of {1:d} bytes" 117 | .format(retrive_size, self.filesize), (self.filename,) 118 | ) 119 | 120 | def handler(self, signum, frame): 121 | print("Received {0}".format(signum)) 122 | sys.exit(0) 123 | 124 | def run(self): 125 | threads = [] 126 | for n in range(self.thread_num-1): 127 | thread = threading.Thread(target=self.download, args=(n*self.alloc, n*self.alloc + self.alloc - 1)) 128 | thread.start() 129 | threads.append(thread) 130 | # Last thread download all the rest 131 | last_thread = threading.Thread(target=self.download, name="Thread-{0}".format(self.thread_num), args=((self.thread_num-1)*self.alloc, self.filesize)) 132 | last_thread.start() 133 | threads.append(last_thread) 134 | for thread in threads: 135 | thread.join() 136 | # Merge the partial file,then delete the source partial file. 137 | self.merge() 138 | 139 | 140 | def main(): 141 | start = time.time() 142 | args = docopt(__doc__, version="0.1") 143 | d = Downloader(args[""], int(args["--thread"])) 144 | d.run() 145 | end = time.time() 146 | print("Download finished with {0:.2f}s.".format(end-start)) 147 | 148 | 149 | 150 | if __name__ == "__main__": 151 | #d = Downloader("http://hellorfimg.zcool.cn/preview/425783590.jpg", 5) 152 | #print(d.get_filesize()) 153 | main() 154 | 155 | -------------------------------------------------------------------------------- /python/crawler/pyaxel/readme.md: -------------------------------------------------------------------------------- 1 | Pyaxel 2 | --- 3 | 4 | Pyaxel is a lightweight command line download accelerator for Linux and MacOS wrote with python Inspired by [axel](https://github.com/eribertomota/axel). 5 | 6 | Features 7 | --- 8 | 9 | * Support HTTP, HTTPS protocols 10 | * Support multi thread and coroutine(todo) 11 | * MD5 checksum 12 | * Show time consumption 13 | 14 | 15 | Installation 16 | --- 17 | 18 | Just support python3 right now 19 | 20 | ```python 21 | pip3 install -r requirements.txt 22 | ``` 23 | 24 | Usage 25 | --- 26 | 27 | ```python 28 | python3 pyaxel.py [URL] 29 | ``` 30 | 31 | or put `pyaxel.py` into your $PATH, and rename to axel, then 32 | 33 | ```python 34 | axel [URL] 35 | ``` 36 | 37 | more: 38 | 39 | ``` 40 | python pyaxel.py --help 41 | ``` 42 | 43 | ScreenShot 44 | --- 45 | ![basic](https://raw.githubusercontent.com/hellorocky/blog/master/picture/10.pyaxel.png) 46 | 47 | 48 | TODO 49 | --- 50 | 51 | * Complete supporting `gevent` and `asyncio` 52 | * Progress bar 53 | * Breakpoint resume 54 | * Ctrl+C signal handler 55 | 56 | 57 | Getting help 58 | --- 59 | 60 | Just open an [issue](https://github.com/hellorocky/LearnByCoding/issues) 61 | 62 | Contributions 63 | --- 64 | 65 | Thanks for some useful suggestion. 66 | 67 | https://www.v2ex.com/t/323771 -------------------------------------------------------------------------------- /python/crawler/pyaxel/requirements.txt: -------------------------------------------------------------------------------- 1 | docopt==0.6.2 2 | requests==2.12.1 3 | -------------------------------------------------------------------------------- /python/excel/huan.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | from gevent import monkey 4 | monkey.patch_all() 5 | 6 | import json 7 | import time 8 | import urllib 9 | import traceback 10 | 11 | import requests 12 | from gevent.pool import Pool 13 | from openpyxl import load_workbook 14 | 15 | 16 | def get_baike_url(name): 17 | """ 18 | 获取每一个明星/导演/电影/电视剧的百度百科的个人页面,从百科里面直接搜索. 19 | """ 20 | if isinstance(name, unicode): 21 | name = name.encode("utf-8") 22 | url = "http://baike.baidu.com/search/word?word={0}".format(name) 23 | resp = requests.get(url, allow_redirects=False) 24 | url = resp.headers.get("Location") if not "search/none" in resp.headers.get("Location") else None 25 | print url 26 | return url 27 | 28 | 29 | class Actor(object): 30 | def __init__(self): 31 | """初始化excel对象""" 32 | self.file_name = "huan.xlsx" 33 | self.work_book = load_workbook(filename=self.file_name) 34 | self.sheets = self.work_book.get_sheet_names() 35 | self.work_sheet = self.work_book.get_sheet_by_name(self.sheets[0]) 36 | 37 | def save(self): 38 | self.work_book.save(filename=self.file_name) 39 | 40 | def tv(self, row): 41 | # 合并LMNO列的数据并去重 42 | try: 43 | tv_name = [] 44 | for col in ["L", "M", "N", "O"]: 45 | col_name = self.work_sheet["{0}{1}".format(col, row)].value 46 | if col_name: 47 | tv_name.extend(col_name.split(",")) 48 | if not tv_name: 49 | return 50 | tv_name = list(set(tv_name)) 51 | except Exception as e: 52 | print traceback.print_exc(e) 53 | try: 54 | for col, name in zip(["AA", "AB", "AC", "AD", "AE", "AF", "AG", "AH"], tv_name): 55 | self.work_sheet["{0}{1}".format(col, row)].value = get_baike_url(name) 56 | except Exception as e: 57 | print traceback.print_exc(e) 58 | print "已完成第: {0}个.".format(row) 59 | 60 | 61 | actor =Actor() 62 | 63 | def main(): 64 | pool = Pool(10) 65 | for row in xrange(2, 4900): 66 | pool.spawn(actor.tv, (row)) 67 | print "第{0}行加入队列!".format(row) 68 | pool.join() 69 | 70 | 71 | if __name__ == "__main__": 72 | start = time.time() 73 | main() 74 | actor.save() 75 | end = time.time() 76 | print "Using {0:.2f}s".format(end-start) 77 | -------------------------------------------------------------------------------- /python/excel/huan.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wufeiqun/LearnByCoding/4fc2c11601922b418b864820cb29bbb98fb65d29/python/excel/huan.xlsx -------------------------------------------------------------------------------- /python/file/alexa.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | import gevent 4 | from gevent import monkey 5 | monkey.patch_socket() 6 | import time 7 | import socket 8 | import traceback 9 | import threading 10 | import multiprocessing 11 | import requests 12 | from bs4 import BeautifulSoup 13 | 14 | 15 | class Site(object): 16 | """ 17 | To test the advantage of the gevent compared to multithreading, 18 | multiprocessing,I crawled the http://www.alexa.com/topsites/global 19 | and resovle the domain names contains in the page. 20 | """ 21 | def __init__(self): 22 | # "http://www.alexa.com/topsites/global;[0-19]" 23 | self.baseURL = "http://www.alexa.com/topsites/global;" 24 | self.timeout = 5 # seconds 25 | self.totalPage = 20 # 0-19 26 | self.domain = [] 27 | self.ipaddr = [] 28 | 29 | def getURList(self, page): 30 | """Get all the domains of the page""" 31 | urList = [] 32 | req = requests.get(self.baseURL+str(page)) 33 | soup = BeautifulSoup(req.text, "lxml") 34 | lst = soup.find_all(class_="site-listing") 35 | for l in lst: 36 | self.domain.append(l.find("a").string) 37 | 38 | def resolve(self, domain): 39 | """ 40 | socket.getaddrinfo(host, port[, family[, socktype[, proto[, flags]]]]) 41 | Just return the Ipv4 TCP info. 42 | """ 43 | try: 44 | ret = socket.getaddrinfo(domain, 80, 2, 1, 6) 45 | ip = ret[0][4][0] 46 | self.ipaddr.append(ip) 47 | except Exception, e: 48 | print traceback.format_exc(e) 49 | 50 | def common(self): 51 | """ 52 | usual method with for ... in xrange. 53 | """ 54 | start = time.time() 55 | for page in xrange(self.totalPage): 56 | self.getURList(page) 57 | end = time.time() 58 | print "***" * 10 + "common" + "***" * 10 59 | print "Total domains: {0}".format(len(self.domain)) 60 | print "Total used %0.2f seconds." % (end - start) 61 | 62 | def multithread(self): 63 | """ 64 | Using multithread. 65 | """ 66 | self.domain = [] # clear it before run 67 | start = time.time() 68 | threads = [] 69 | for page in xrange(self.totalPage): 70 | t = threading.Thread(target=self.getURList, args=(page,)) 71 | t.start() 72 | threads.append(t) 73 | for thread in threads: 74 | thread.join() 75 | end = time.time() 76 | print "***" * 10 + "multithreading" + "***" * 10 77 | print "Total domains: {0}".format(len(self.domain)) 78 | print "Total used %0.2f seconds." % (end - start) 79 | 80 | def multiprocess(self): 81 | """ 82 | Using multiprocess. 83 | """ 84 | self.domain = multiprocessing.Manager().list() 85 | start = time.time() 86 | processes = [] 87 | for page in xrange(self.totalPage): 88 | p = multiprocessing.Process(target=self.getURList, args=(page,)) 89 | p.start() 90 | processes.append(p) 91 | for process in processes: 92 | process.join() 93 | end = time.time() 94 | print "***" * 10 + "multiprocessing" + "***" * 10 95 | print "Total domains: {0}".format(len(self.domain)) 96 | print "Total used %0.2f seconds." % (end - start) 97 | 98 | def gevt(self): 99 | """ 100 | Using gevent. 101 | """ 102 | self.domain = [] 103 | start = time.time() 104 | greenlets = [] 105 | for page in xrange(self.totalPage): 106 | greenlets.append(gevent.spawn(self.getURList, page)) 107 | gevent.joinall(greenlets) 108 | end = time.time() 109 | print "***" * 10 + "gevent" + "***" * 10 110 | print "Total domains: {0}".format(len(self.domain)) 111 | print "Total used %0.2f seconds." % (end - start) 112 | 113 | def a(): 114 | print 121 115 | 116 | 117 | if __name__ == "__main__": 118 | site = Site() 119 | site.multithread() 120 | site.gevt() 121 | -------------------------------------------------------------------------------- /python/file/count_filesize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | """ 4 | 统计某一个目录下所有文件的大小,包括目录下所有层次的文件 5 | """ 6 | import os 7 | 8 | def count(path): 9 | for root, dirs, files in os.walk(path): 10 | for fname in files: 11 | f = os.path.join(root, fname) 12 | fsize = os.stat(f).st_size 13 | print "{0} --->{1} bytes".format(f, fsize) 14 | 15 | 16 | if __name__ == "__main__": 17 | path="python" 18 | count(path) 19 | 20 | -------------------------------------------------------------------------------- /python/file/dns_massive_resolve.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | import gevent 4 | from gevent import socket 5 | from gevent.pool import Pool 6 | 7 | from alexa import Site 8 | 9 | site = Site() 10 | site.gevt() 11 | domains = site.domain 12 | 13 | pool = Pool(10) 14 | finished = 0 15 | 16 | def job(url): 17 | global finished 18 | try: 19 | try: 20 | ip = socket.gethostbyname(url) 21 | print "{0}------>{1}".format(url, ip) 22 | except socket.gaierror as ex: 23 | print "{0} failed with {1}".format(url, ex) 24 | finally: 25 | finished += 1 26 | 27 | with gevent.Timeout(2, False): 28 | for domain in domains: 29 | pool.spawn(job, domain) 30 | pool.join() 31 | 32 | print "Finished within 2 seconds:{0}/{1}".format(finished, 500) 33 | -------------------------------------------------------------------------------- /python/file/lru.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | import collections 4 | 5 | class LRU(object): 6 | def __init__(self, capacity): 7 | self.capacity = capacity 8 | self.cache = collections.OrderedDict() 9 | 10 | def get(self, key): 11 | if not key in self.cache: 12 | return -1 13 | value = self.cache.pop(key) 14 | self.cache[key] = value 15 | 16 | def set(self, key, value): 17 | if key in self.cache: 18 | value = self.cache.pop(key) 19 | elif len(self.cache) == self.capacity: 20 | self.cache.popitem(last=False) 21 | self.cache[key] = value 22 | -------------------------------------------------------------------------------- /python/file/tmpfile.py: -------------------------------------------------------------------------------- 1 | # 很多情况下我们会去使用临时文件, 比如之前写过一个备份恢复mysql的脚本, 每一个表先备份到一个临时目录中 2 | # 之前做过一个修改远程服务器的hosts文件的, 使用的也是一个临时的目录 3 | # 这个时候我们使用python内置模块tempfile再合适不过了 4 | import tempfile 5 | # 临时文件 6 | 7 | with tempfile.TemporaryFile() as f: 8 | f.write(b"AAA") 9 | # Seek back to beginning and read the data 10 | f.seek(0) 11 | f.read() 12 | # Now the temprory file is destroyed 13 | 14 | f = tempfile.TemporaryFile() 15 | f.write(b"AAA") 16 | f.seek(0) 17 | data = f.read() 18 | print(data) 19 | f.close() 20 | # Now the temprory file is destroyed 21 | 22 | # 临时目录 23 | with tempfile.TemporaryDirectory() as dirname: 24 | print(type(dirname)) #str 25 | print(dirname) # /tmp/path 26 | 27 | # 临时目录销毁 28 | 29 | tmpdir = tempfile.TemporaryDirectory() 30 | # 使用临时目录 31 | dirname = tmpdir.name 32 | # 显式销毁, 如果不显式销毁, 程序执行完也会自动销毁的 33 | tmpdir.cleanup() 34 | 35 | 36 | -------------------------------------------------------------------------------- /python/network/domain-https-check.py: -------------------------------------------------------------------------------- 1 | import ssl 2 | import socket 3 | import datetime 4 | 5 | import OpenSSL 6 | 7 | #检查指定域名的HTTPS过期时间 8 | #项目依赖安装: 9 | #pip install pyopenssl 10 | 11 | 12 | def get_server_certificate(hostname, port=443): 13 | cert=ssl.get_server_certificate((hostname, port)) 14 | x509 = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, cert) 15 | expire_date_str = x509.get_notAfter().decode()[:-1] # 20181107235959Z, 去掉最后的字母Z 16 | expire_date = datetime.datetime.strptime(expire_date_str, "%Y%m%d%H%M%S") 17 | expire_date = expire_date + datetime.timedelta(hours=8) # 默认的时间是0时区的时间, 需要加8个小时 18 | print(hostname, expire_date) 19 | 20 | 21 | lines = open("domain.txt").readlines() 22 | for line in lines: 23 | try: 24 | client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 25 | client.settimeout(1) 26 | # 这里检查目标IP地址的443端口是通的, 然后再往下执行 27 | if client.connect_ex(("{0}".format(line.strip()), 443)) == 0: 28 | get_server_certificate("{0}".format(line.strip())) 29 | except socket.gaierror: 30 | pass 31 | -------------------------------------------------------------------------------- /python/network/port_scan/basic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | import socket 4 | import getopt 5 | 6 | def main(host): 7 | def scanner(port): 8 | class Scanner: 9 | def __init__(self, host): 10 | self.host = host 11 | 12 | self.timeout = 0.2 13 | self.client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 14 | client.settimeout(self.timeout) 15 | 16 | def scanner(self): 17 | for port in range(1, 65536): 18 | if self.client.connect_ex((self.host, int(port))) == 0: 19 | print("Port: {0} is open".format(port)) 20 | client.close() 21 | 22 | 23 | 24 | 25 | 26 | 27 | if __name__ == "__main__": 28 | if len(sys.argv) == 2: 29 | main(sys.argv[1]) 30 | -------------------------------------------------------------------------------- /python/network/port_scan/ccurt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | import socket 4 | from queue import Queue 5 | from concurrent.futures import ThreadPoolExecutor 6 | 7 | def scan(port): 8 | client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 9 | client.settimeout(0.5) 10 | if client.connect_ex(("118.193.81.214", port)) == 0: 11 | print("Port: {0} is open".format(port)) 12 | client.close() 13 | 14 | if __name__ == "__main__": 15 | q = Queue() 16 | map(q.put, range(1, 65535)) 17 | with ThreadPoolExecutor(max_workers=500) as executor: 18 | for i in range(500): 19 | executor.submit(executor, q) 20 | -------------------------------------------------------------------------------- /python/network/port_scan/gevent_scanner.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | from datetime import datetime 4 | import gevent 5 | from gevent import Greenlet 6 | import socket 7 | 8 | def scanner_port(host, port): 9 | print("now scanner port : {}".format(port)) 10 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 11 | result = sock.connect_ex((remoteServerIP, port)) 12 | if result == 0: 13 | print("port {}: \t Open".format(port)) 14 | 15 | def init_thread(call_func, host): 16 | thread_list = [] 17 | for i in range(1,65536): 18 | thread = Greenlet.spawn(call_func,host,i) 19 | thread_list.append(thread) 20 | return thread_list 21 | 22 | def run_scanner(thread_list): 23 | gevent.joinall(thread_list) 24 | 25 | 26 | remoteServer = input("pls enter remote host: \t") 27 | remoteServerIP = socket.gethostbyname(remoteServer) 28 | 29 | print("-" * 60) 30 | print("pls warit , scanning remote host", remoteServerIP) 31 | print("-" * 60) 32 | 33 | threads=init_thread(scanner_port, remoteServerIP) 34 | run_scanner(threads) 35 | 36 | 37 | -------------------------------------------------------------------------------- /python/network/port_scan/gevt_pool.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from gevent import monkey 3 | monkey.patch_all() 4 | import socket 5 | from gevent.pool import Pool 6 | 7 | class Scanner: 8 | def __init__(self, host, timeout=0.2): 9 | self.host = host 10 | self.timeout = timeout 11 | 12 | def scan(self, port): 13 | client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 14 | client.settimeout(self.timeout) 15 | if client.connect_ex((self.host, port)) == 0: 16 | print("Port: {0} is open".format(port)) 17 | client.close() 18 | 19 | if __name__ == "__main__": 20 | pool = Pool(20) 21 | s = Scanner("123.249.94.160") 22 | pool.map(s.scan, range(1, 65536)) 23 | pool.join() 24 | -------------------------------------------------------------------------------- /python/network/port_scan/gevt_spawn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | from gevent import monkey 4 | monkey.patch_all() 5 | import gevent 6 | import socket 7 | 8 | def scan(port): 9 | client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 10 | client.settimeout(0.5) 11 | if client.connect_ex(("118.193.81.214", port)) == 0: 12 | print "Port: {0} is open".format(port) 13 | client.close() 14 | 15 | if __name__ == "__main__": 16 | greenlets = [gevent.spawn(scan, i) for i in xrange(1, 65535)] 17 | gevent.joinall(greenlets) 18 | -------------------------------------------------------------------------------- /python/network/port_scan/multiprocess.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | import socket 4 | from multiprocessing import Pool 5 | 6 | def scan(port): 7 | client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 8 | client.settimeout(0.5) 9 | if client.connect_ex(("118.193.81.214", port)) == 0: 10 | print "Port: {0} is open".format(port) 11 | client.close() 12 | 13 | if __name__ == "__main__": 14 | pool = Pool(10) 15 | pool.map(scan, xrange(1, 65535)) 16 | pool.close() 17 | pool.join() 18 | -------------------------------------------------------------------------------- /python/network/port_scan/multithread.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | import socket 4 | from multiprocessing.dummy import Pool as ThreadPool 5 | 6 | class Scanner: 7 | def __init__(self, host): 8 | self.host = host 9 | self.timeout = 0.5 10 | self.pool = ThreadPool(100) 11 | 12 | def scan(self, port): 13 | client = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 14 | client.settimeout(self.timeout) 15 | if client.connect_ex((self.host, port)) == 0: 16 | print("Port: {0} is open".format(port)) 17 | 18 | def start(self): 19 | self.pool.map(self.scan, range(1, 10000)) 20 | self.pool.close() 21 | self.pool.join() 22 | 23 | if __name__ == "__main__": 24 | if len(sys.argv) == 2: 25 | s = Scanner(sys.argv[1]) 26 | s.start() 27 | -------------------------------------------------------------------------------- /python/network/socket/asyncio_server.py: -------------------------------------------------------------------------------- 1 | import os 2 | import asyncio 3 | import argparse 4 | # 参考https://docs.python.org/3/library/asyncio-protocol.html#asyncio.Protocol.eof_received 5 | 6 | 7 | class EchoServerClientProtocol(asyncio.Protocol): 8 | def connection_made(self, transport): 9 | self.client_address = transport.get_extra_info("peername") 10 | print("收到{0}:{1}的连接!".format(*self.client_address)) 11 | self.transport = transport 12 | 13 | def data_received(self, data): 14 | message = data.decode(encoding="utf-8", errors="ignore") 15 | print("接收到来自客户端: {1}:{2}的数据: {0}".format(message, *self.client_address)) 16 | self.transport.write("谢谢你!\n".encode()) 17 | 18 | def eof_received(self): 19 | print("eof received!") 20 | 21 | def connection_lost(self, exc): 22 | print("客户端 {0}:{1}断开连接!".format(*self.client_address)) 23 | 24 | if __name__ == "__main__": 25 | parser = argparse.ArgumentParser(description="简单的TCP回显服务器!") 26 | parser.add_argument("--hostname", dest="hostname", default="0.0.0.0", metavar="IP", help="请输入监听的IP地址") 27 | parser.add_argument("--port", dest="port", type=int, default=8888, metavar="端口", help="请输入监听的端口") 28 | args = parser.parse_args() 29 | 30 | loop = asyncio.get_event_loop() 31 | # 每一个新的请求实例化一个类 32 | coro = loop.create_server(EchoServerClientProtocol, args.hostname, args.port) 33 | server = loop.run_until_complete(coro) 34 | 35 | print("Server start at: {1}:{2} PID:{0}".format(os.getpid(), *server.sockets[0].getsockname())) 36 | try: 37 | loop.run_forever() 38 | except KeyboardInterrupt: 39 | server.close() 40 | loop.run_until_complete(server.wait_closed()) 41 | loop.close() 42 | -------------------------------------------------------------------------------- /python/network/socket/basic_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | 简单的TCPServer 4 | """ 5 | import os 6 | import socket 7 | import argparse 8 | 9 | 10 | class TCPServer: 11 | def __init__(self, host, port): 12 | self.pid = os.getpid() 13 | self.address = (host, port) 14 | 15 | def handler(self, connection, client_address): 16 | while True: 17 | data = connection.recv(2048) 18 | if not data: 19 | break 20 | print("接收到来自客户端: {1}:{2}的数据: {0}".format(data.decode(encoding="utf-8", errors="ignore"), *client_address)) 21 | connection.send("谢谢你!\n".encode()) 22 | print("客户端: {0}:{1} 断开连接!".format(*client_address)) 23 | 24 | def run(self): 25 | server = socket.socket(socket.AF_INET,socket.SOCK_STREAM) 26 | server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 27 | server.bind(self.address) 28 | server.listen(5) 29 | print("Started server at {0}:{1}, PID: {2}".format(*self.address, self.pid)) 30 | while True: 31 | connection, client_address = server.accept() 32 | print("收到{0}:{1}的连接!".format(*client_address)) 33 | self.handler(connection, client_address) 34 | 35 | 36 | 37 | if __name__ == "__main__": 38 | parser = argparse.ArgumentParser(description="简单的TCP回显服务器!") 39 | parser.add_argument("--hostname", dest="hostname", default="0.0.0.0", metavar="IP", help="请输入监听的IP地址") 40 | parser.add_argument("--port", dest="port", type=int, default=8888, metavar="端口", help="请输入监听的端口") 41 | args = parser.parse_args() 42 | server = TCPServer(args.hostname, args.port) 43 | server.run() 44 | -------------------------------------------------------------------------------- /python/network/socket/client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import time 4 | import socket 5 | import signal 6 | import argparse 7 | import threading 8 | 9 | 10 | class Client: 11 | def __init__(self, host, port, thread_num, interval): 12 | self.address = (host, port) 13 | self.thread_num = thread_num 14 | self.interval = interval 15 | self.threads = [] 16 | 17 | def client(self, tid): 18 | sock = "client{0}".format(tid) 19 | locals()[sock] = socket.socket(socket.AF_INET,socket.SOCK_STREAM) 20 | locals()[sock].connect(self.address) 21 | locals()[sock].settimeout(10) 22 | 23 | while True: 24 | data = "你好, 服务器! 我是: {0}, 现在时间是: {1}".format(sock, time.strftime("%H:%M:%S", time.localtime())) 25 | locals()[sock].sendall(data.encode()) 26 | print("发送: {0}".format(data)) 27 | recv_data = locals()[sock].recv(1024) 28 | print("接收: {0}".format(recv_data.decode(encoding="utf-8", errors="ignore"))) 29 | time.sleep(self.interval) 30 | 31 | def run(self): 32 | for tid in range(self.thread_num): 33 | thread = threading.Thread(target=self.client, args=(tid,)) 34 | thread.start() 35 | print("{0} has started...".format(thread.name)) 36 | self.threads.append(thread) 37 | 38 | for thread in self.threads: 39 | thread.join() 40 | 41 | # register Ctrl+C signal 42 | #def handler(sig, frame): 43 | 44 | 45 | if __name__ == "__main__": 46 | parser = argparse.ArgumentParser(description="简单的TCP客户端!") 47 | parser.add_argument("--hostname", dest="hostname", default="127.0.0.1", metavar="IP", help="请输入监听的IP地址") 48 | parser.add_argument("--port", dest="port", type=int, default=8888, metavar="端口", help="请输入监听的端口") 49 | parser.add_argument("--thread", dest="thread", type=int, default=1, metavar="线程数", help="请输入要启动的客户端线程数") 50 | parser.add_argument("--interval", dest="interval", type=int, default=2, metavar="间隔时间(秒)", help="请输入间隔时长") 51 | args = parser.parse_args() 52 | client = Client(args.hostname, args.port, args.thread, args.interval) 53 | client.run() 54 | -------------------------------------------------------------------------------- /python/network/socket/epoll_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #优点: 1. 相比较于poll, 不随着监控的fd数量的增长而变慢, 基于事件 3 | import os 4 | import queue 5 | import select 6 | import socket 7 | import argparse 8 | 9 | 10 | class TCPServer: 11 | def __init__(self, host, port, timeout): 12 | self.pid = os.getpid() 13 | self.server_address = (host, port) 14 | self.server = self.create_server() 15 | self.timeout = timeout # 单位(秒) 16 | self.message_queues = {} 17 | self.READ_ONLY = (select.EPOLLIN | select.EPOLLPRI | select.EPOLLHUP | select.EPOLLERR) 18 | self.READ_WRITE = (self.READ_ONLY | select.EPOLLOUT) 19 | self.epoller = select.epoll() 20 | self.epoller.register(self.server.fileno(), self.READ_ONLY) 21 | # 由于poll返回的是一个数组, 数组中的元素为元组, 元组格式为(fd, flag), 所以有必要定义一个fd_to_socket 22 | self.fd_to_socket = {self.server.fileno(): self.server} 23 | # 由于客户端socket关闭以后要删除fd_to_socket中保存的socket, 但是socket是value, 为了更加快速删除该条记录 24 | # 又定义了一个socket_to_fd字典 25 | self.socket_to_fd = {self.server: self.server.fileno()} 26 | 27 | def create_server(self): 28 | server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 29 | server.setblocking(False) 30 | server.bind(self.server_address) 31 | server.listen(5) 32 | return server 33 | 34 | def handle_receive(self, sock): 35 | if sock is self.server: # 说明是有新的客户端请求进来 36 | connection, client_address = sock.accept() 37 | print("收到{0}:{1}的连接!".format(*client_address)) 38 | connection.setblocking(False) 39 | self.fd_to_socket[connection.fileno()] = connection 40 | self.socket_to_fd[connection] = connection.fileno() 41 | # 将新来的客户端socket注册到监听列表中 42 | self.epoller.register(connection.fileno(), self.READ_ONLY) 43 | # 这里使用了队列, 保存要返回的数据, 如果只是学习poll使用的话可以去掉, 让每个socket返回相同数据即可 44 | self.message_queues[connection] = queue.Queue() 45 | else: # 说明已经建立连接的socket有数传送过来据等待接收 46 | data = sock.recv(1024) # 这里不能使用while阻塞地获取完数据, 因为是非阻塞式的socket, 通过poll的不停遍历实现数据的全部获取 47 | if data: 48 | print("接收到来自客户端: {1}:{2}的数据: {0}".format(data.decode(encoding="utf-8", errors="ignore"), 49 | *sock.getpeername())) 50 | self.message_queues[sock].put("谢谢你\n".encode()) 51 | # 修改该socket监控状态为读写 52 | self.epoller.modify(sock, self.READ_WRITE) 53 | else: # 客户端断开连接 54 | print("客户端: {0}:{1} 断开连接!".format(*sock.getpeername())) 55 | # 取消该socket的注册信息 56 | self.epoller.unregister(sock) 57 | # 删除该socket的消息队列 58 | del self.message_queues[sock] 59 | # 从fd字段中删除该socket 60 | del self.fd_to_socket[self.socket_to_fd[sock]] 61 | del self.socket_to_fd[sock] 62 | # 关闭该socket 63 | sock.close() 64 | 65 | def handle_send(self, sock): 66 | try: 67 | next_msg = self.message_queues[sock].get_nowait() # 从消息队列里获取数据(前面填充到该队列里的数据) 68 | except queue.Empty: # 该socket的消息队列为空的时候表示已经读取并回复了, 这时候应该只监听该socket是否可读即可 69 | self.epoller.modify(sock, self.READ_ONLY) 70 | else: 71 | sock.send(next_msg) 72 | 73 | def handle_exception(self, sock): 74 | print("连接: {0}:{1} 发生异常!".format(*sock.getpeername())) 75 | # 取消该socket的注册信息 76 | self.epoller.unregister(sock) 77 | # 删除该socket的消息队列 78 | del self.message_queues[sock] 79 | # 从fd字段中删除该socket 80 | del self.fd_to_socket[self.socket_to_fd[sock]] 81 | del self.socket_to_fd[sock] 82 | sock.close() 83 | 84 | def run(self): 85 | print("Started server at {0}:{1}, PID: {2}".format(*self.server_address, self.pid)) 86 | while True: 87 | events = self.epoller.poll(self.timeout) 88 | if not events: 89 | print("继续事件监听中...") 90 | continue 91 | for fd, event in events: 92 | sock = self.fd_to_socket[fd] 93 | if event == select.EPOLLIN: 94 | self.handle_receive(sock) 95 | elif event == select.EPOLLOUT: # 说明该socket是可写状态, 一般只要没有在可读状态的时候一个socket都是出于可写状态的 96 | self.handle_send(sock) 97 | elif event in (select.EPOLLERR, select.EPOLLHUP): 98 | self.handle_exception(sock) 99 | 100 | 101 | if __name__ == "__main__": 102 | parser = argparse.ArgumentParser(description="简单的TCPServer!") 103 | parser.add_argument("--hostname", dest="hostname", default="0.0.0.0", metavar="IP", help="请输入监听的IP地址") 104 | parser.add_argument("--port", dest="port", type=int, default=8888, metavar="端口", help="请输入监听的端口") 105 | parser.add_argument("--timeout", dest="timeout", type=int, default=5, metavar="超时时间(秒)", help="请输入超时时间") 106 | args = parser.parse_args() 107 | server = TCPServer(args.hostname, args.port, args.timeout) 108 | server.run() 109 | -------------------------------------------------------------------------------- /python/network/socket/gevent_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | 简单的TCPServer 4 | """ 5 | import os 6 | import socket 7 | import argparse 8 | 9 | from gevent.server import StreamServer 10 | 11 | 12 | class TCPServer: 13 | def __init__(self, host, port): 14 | self.pid = os.getpid() 15 | self.address = (host, port) 16 | 17 | def handler(self, connection, client_address): 18 | while True: 19 | data = connection.recv(2048) 20 | if not data: 21 | break 22 | print("接收到来自客户端: {1}:{2}的数据: {0}".format(data.decode(encoding="utf-8", errors="ignore"), *client_address)) 23 | connection.send("谢谢你!\n".encode()) 24 | print("客户端: {0}:{1} 断开连接!".format(*client_address)) 25 | 26 | def run(self): 27 | print("Started server at {0}:{1}, PID: {2}".format(*self.address, self.pid)) 28 | server = StreamServer(self.address, self.handler) 29 | server.serve_forever() 30 | 31 | 32 | if __name__ == "__main__": 33 | parser = argparse.ArgumentParser(description="简单的TCP回显服务器!") 34 | parser.add_argument("--hostname", dest="hostname", default="0.0.0.0", metavar="IP", help="请输入监听的IP地址") 35 | parser.add_argument("--port", dest="port", type=int, default=8888, metavar="端口", help="请输入监听的端口") 36 | args = parser.parse_args() 37 | server = TCPServer(args.hostname, args.port) 38 | server.run() 39 | -------------------------------------------------------------------------------- /python/network/socket/multiprocessing_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | 多进程版本的TCPServer 4 | """ 5 | import os 6 | import socket 7 | import argparse 8 | import traceback 9 | import multiprocessing 10 | 11 | 12 | class EchoServer: 13 | def __init__(self, host, port): 14 | self.process_num = 0 15 | self.address = (host, port) 16 | self.pid = os.getpid() 17 | 18 | def handler(self, conn, addr): 19 | while True: 20 | data = conn.recv(2048) 21 | if not data: 22 | break 23 | print("接收到来自客户端: {1}:{2}的数据: {0}".format(data.decode(encoding="utf-8", errors="ignore"), *addr)) 24 | conn.sendall("谢谢你!\n".encode()) 25 | print("Closed from {0}:{1}".format(*addr)) 26 | 27 | def run(self): 28 | server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 29 | server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 30 | server.bind(self.address) 31 | server.listen(5) 32 | 33 | print("Started server at {0}:{1} with pid: {2}...".format(*self.address, self.pid)) 34 | 35 | while True: 36 | connection, client_address = server.accept() 37 | self.process_num += 1 38 | process = multiprocessing.Process(target=self.handler, args=(connection, client_address,)) 39 | process.daemon = True 40 | process.start() 41 | print("Server process {0} has started to process {1}:{2}".format(self.process_num, *client_address)) 42 | 43 | 44 | if __name__ == "__main__": 45 | parser = argparse.ArgumentParser(description="简单的TCPServer!") 46 | parser.add_argument("--hostname", dest="hostname", default="0.0.0.0", metavar="IP", help="请输入监听的IP地址") 47 | parser.add_argument("--port", dest="port", type=int, default=8888, metavar="端口", help="请输入监听的端口") 48 | args = parser.parse_args() 49 | server = EchoServer(args.hostname, args.port) 50 | try: 51 | server.run() 52 | except: 53 | traceback.print_exc() 54 | finally: 55 | for process in multiprocessing.active_children(): 56 | print("关闭子进程: {0}".format(process.name)) 57 | process.terminate() 58 | process.join() 59 | -------------------------------------------------------------------------------- /python/network/socket/poll_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #优点: 1. 相比较于select, 监控的fd没有数量限制 3 | #缺点: 1. 只支持Linux, MacOS和Windows不支持(Mac上测试有bug) 4 | import os 5 | import queue 6 | import select 7 | import socket 8 | import argparse 9 | 10 | 11 | class TCPServer: 12 | def __init__(self, host, port, timeout): 13 | self.pid = os.getpid() 14 | self.server_address = (host, port) 15 | self.server = self.create_server() 16 | self.timeout = timeout # 单位(毫秒) 17 | self.message_queues = {} 18 | self.READ_ONLY = (select.POLLIN | select.POLLPRI | select.POLLHUP | select.POLLERR) 19 | self.READ_WRITE = (self.READ_ONLY | select.POLLOUT) 20 | self.poller = select.poll() 21 | self.poller.register(self.server.fileno(), self.READ_ONLY) 22 | # 由于poll返回的是一个数组, 数组中的元素为元组, 元组格式为(fd, flag), 所以有必要定义一个fd_to_socket 23 | self.fd_to_socket = {self.server.fileno(): self.server} 24 | # 由于客户端socket关闭以后要删除fd_to_socket中保存的socket, 但是socket是value, 为了更加快速删除该条记录 25 | # 又定义了一个socket_to_fd字典 26 | self.socket_to_fd = {self.server: self.server.fileno()} 27 | 28 | def create_server(self): 29 | server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 30 | server.setblocking(False) 31 | server.bind(self.server_address) 32 | server.listen(5) 33 | return server 34 | 35 | def handle_receive(self, sock): 36 | if sock is self.server: # 说明是有新的客户端请求进来 37 | connection, client_address = sock.accept() 38 | print("收到{0}:{1}的连接!".format(*client_address)) 39 | connection.setblocking(False) 40 | self.fd_to_socket[connection.fileno()] = connection 41 | self.socket_to_fd[connection] = connection.fileno() 42 | # 将新来的客户端socket注册到监听列表中 43 | self.poller.register(connection, self.READ_ONLY) 44 | # 这里使用了队列, 保存要返回的数据, 如果只是学习poll使用的话可以去掉, 让每个socket返回相同数据即可 45 | self.message_queues[connection] = queue.Queue() 46 | else: # 说明已经建立连接的socket有数传送过来据等待接收 47 | data = sock.recv(1024) # 这里不能使用while阻塞地获取完数据, 因为是非阻塞式的socket, 通过poll的不停遍历实现数据的全部获取 48 | if data: 49 | print("接收到来自客户端: {1}:{2}的数据: {0}".format(data.decode(encoding="utf-8", errors="ignore"), 50 | *sock.getpeername())) 51 | self.message_queues[sock].put("谢谢你\n".encode()) 52 | # 修改该socket监控状态为读写 53 | self.poller.modify(sock, self.READ_WRITE) 54 | else: # 客户端断开连接 55 | print("客户端: {0}:{1} 断开连接!".format(*sock.getpeername())) 56 | # 取消该socket的注册信息 57 | self.poller.unregister(sock) 58 | # 删除该socket的消息队列 59 | del self.message_queues[sock] 60 | # 从fd字段中删除该socket 61 | del self.fd_to_socket[self.socket_to_fd[sock]] 62 | del self.socket_to_fd[sock] 63 | # 关闭该socket 64 | sock.close() 65 | 66 | def handle_send(self, sock): 67 | try: 68 | next_msg = self.message_queues[sock].get_nowait() # 从消息队列里获取数据(前面填充到该队列里的数据) 69 | except queue.Empty: # 该socket的消息队列为空的时候表示已经读取并回复了, 这时候应该只监听该socket是否可读即可 70 | self.poller.modify(sock, self.READ_ONLY) 71 | else: 72 | sock.send(next_msg) 73 | 74 | def handle_exception(self, sock): 75 | print("连接: {0}:{1} 发生异常!".format(*sock.getpeername())) 76 | # 取消该socket的注册信息 77 | self.poller.unregister(sock) 78 | # 删除该socket的消息队列 79 | del self.message_queues[sock] 80 | # 从fd字段中删除该socket 81 | del self.fd_to_socket[self.socket_to_fd[sock]] 82 | del self.socket_to_fd[sock] 83 | sock.close() 84 | 85 | def run(self): 86 | print("Started server at {0}:{1}, PID: {2}".format(*self.server_address, self.pid)) 87 | while True: 88 | events = self.poller.poll(self.timeout) 89 | if not events: 90 | print("继续事件监听中...") 91 | continue 92 | for fd, event in events: 93 | sock = self.fd_to_socket[fd] 94 | if event in (select.POLLIN, select.POLLPRI): 95 | self.handle_receive(sock) 96 | elif event == select.POLLOUT: # 说明该socket是可写状态, 一般只要没有在可读状态的时候一个socket都是出于可写状态的 97 | self.handle_send(sock) 98 | elif event in (select.POLLERR, select.POLLHUP): 99 | self.handle_exception(sock) 100 | 101 | 102 | if __name__ == "__main__": 103 | parser = argparse.ArgumentParser(description="简单的TCP回显服务器!") 104 | parser.add_argument("--hostname", dest="hostname", default="0.0.0.0", metavar="IP", help="请输入监听的IP地址") 105 | parser.add_argument("--port", dest="port", type=int, default=8888, metavar="端口", help="请输入监听的端口") 106 | parser.add_argument("--timeout", dest="timeout", type=int, default=5000, metavar="超时时间(毫秒)", help="请输入超时时间") 107 | args = parser.parse_args() 108 | server = TCPServer(args.hostname, args.port, args.timeout) 109 | server.run() 110 | -------------------------------------------------------------------------------- /python/network/socket/select_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #select是IO多路复用的一种技术, 优点是跨平台性好, 缺点是单个进程监控的fd数量有上限FD_SIZE, 一般是1024 3 | import os 4 | import queue 5 | import socket 6 | import select 7 | import argparse 8 | 9 | 10 | class TCPServer: 11 | def __init__(self, host, port, timeout): 12 | self.pid = os.getpid() 13 | self.server_address = (host, port) 14 | self.server = self.create_server() 15 | self.inputs = [self.server] 16 | self.outputs = [] 17 | self.timeout = timeout # 单位(秒) 18 | self.message_queues = {} 19 | 20 | def create_server(self): 21 | server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 22 | server.setblocking(False) 23 | server.bind(self.server_address) 24 | server.listen(5) 25 | return server 26 | 27 | def handle_receive(self, readable): 28 | for sock in readable: 29 | if sock is self.server: 30 | connection, client_address = sock.accept() 31 | print("收到{0}:{1}的连接!".format(*client_address)) 32 | connection.setblocking(False) 33 | self.inputs.append(connection) 34 | self.message_queues[connection] = queue.Queue() 35 | else: 36 | data = sock.recv(1024) 37 | if data: 38 | print("接收到来自客户端: {1}:{2}的数据: {0}".format(data.decode(encoding="utf-8", errors="ignore"), 39 | *sock.getpeername())) 40 | self.message_queues[sock].put("谢谢你".encode()) 41 | if sock not in self.outputs: 42 | self.outputs.append(sock) 43 | else: 44 | print("客户端: {0}:{1} 断开连接!".format(*sock.getpeername())) 45 | if sock in self.outputs: 46 | self.outputs.remove(sock) 47 | self.inputs.remove(sock) 48 | sock.close() 49 | del self.message_queues[sock] 50 | 51 | def handle_send(self, writable): 52 | for sock in writable: 53 | try: 54 | next_msg = self.message_queues[sock].get_nowait() 55 | except queue.Empty: 56 | self.outputs.remove(sock) 57 | else: # 当try语句执行的时候才执行else 58 | sock.send(next_msg) 59 | 60 | def handle_exception(self, exceptional): 61 | for sock in exceptional: 62 | print("Socket: {0}:{1} 发生异常!".format(*sock.getpeername())) 63 | self.inputs.remove(sock) 64 | if sock in self.outputs: 65 | self.outputs.remove(sock) 66 | sock.close() 67 | del self.message_queues[sock] 68 | 69 | def run(self): 70 | print("Started server at {0}:{1}, PID: {2}".format(*self.server_address, self.pid)) 71 | while self.inputs: 72 | try: 73 | readable, writable, exceptional = select.select(self.inputs, self.outputs, self.inputs, self.timeout) 74 | if not (readable or writable or exceptional): 75 | print('在指定时间内未发现活跃的socket,这里可以做一些其它的事情, 然后继续监控!') 76 | continue 77 | if readable: 78 | self.handle_receive(readable) 79 | if writable: 80 | self.handle_send(writable) 81 | if exceptional: 82 | self.handle_exception(exceptional) 83 | except select.error: 84 | print("select error") 85 | 86 | 87 | 88 | if __name__ == "__main__": 89 | parser = argparse.ArgumentParser(description="简单的TCPServer!") 90 | parser.add_argument("--hostname", dest="hostname", default="0.0.0.0", metavar="IP", help="请输入监听的IP地址") 91 | parser.add_argument("--port", dest="port", type=int, default=8888, metavar="端口", help="请输入监听的端口") 92 | parser.add_argument("--timeout", dest="timeout", type=int, default=10, metavar="超时时间(秒)", help="请输入超时时间") 93 | args = parser.parse_args() 94 | server = TCPServer(args.hostname, args.port, args.timeout) 95 | server.run() 96 | -------------------------------------------------------------------------------- /python/network/socket/selectors_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import socket 4 | import argparse 5 | import selectors 6 | 7 | 8 | class TCPServer(object): 9 | """Simple TCPServer with selectors""" 10 | def __init__(self, host, port): 11 | self.pid = os.getpid() 12 | self.address = (host, port) 13 | self.sel = selectors.DefaultSelector() 14 | 15 | def accept(self, server_socket, mask): 16 | connection, client_address = server_socket.accept() 17 | print("收到{0}:{1}的连接!".format(*client_address)) 18 | connection.setblocking(False) 19 | self.sel.register(connection, selectors.EVENT_READ, self.handle) 20 | 21 | def handle(self, connection, mask): 22 | data = connection.recv(1024) 23 | if data: 24 | print("接收到来自客户端: {1}:{2}的数据: {0}".format(data.decode(encoding="utf-8", errors="ignore"), *connection.getpeername())) 25 | connection.send("谢谢你!\n".encode()) 26 | else: 27 | print("客户端: {0}:{1} 断开连接!".format(*connection.getpeername())) 28 | self.sel.unregister(connection) 29 | connection.close() 30 | 31 | def run(self): 32 | server = socket.socket(socket.AF_INET,socket.SOCK_STREAM) 33 | server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 34 | server.bind(self.address) 35 | server.listen(100) 36 | self.sel.register(server, selectors.EVENT_READ, self.accept) 37 | print("Started server at {1}:{2}, PID: {0}".format(self.pid, *self.address)) 38 | 39 | while True: 40 | events = self.sel.select(timeout=100) 41 | # key is a SelectorsKey instance including info about the client/server connection 42 | for key, mask in events: 43 | callback = key.data 44 | sock = key.fileobj 45 | callback(sock, mask) 46 | self.sel.close() 47 | 48 | 49 | if __name__ == "__main__": 50 | parser = argparse.ArgumentParser(description="简单的TCP回显服务器!") 51 | parser.add_argument("--hostname", dest="hostname", default="0.0.0.0", metavar="IP", help="请输入监听的IP地址") 52 | parser.add_argument("--port", dest="port", type=int, default=8888, metavar="端口", help="请输入监听的端口") 53 | args = parser.parse_args() 54 | server = TCPServer(args.hostname, args.port) 55 | server.run() 56 | 57 | 58 | -------------------------------------------------------------------------------- /python/network/socket/socketserver_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | 简单的TCPServer 4 | """ 5 | import os 6 | import argparse 7 | import socketserver 8 | 9 | 10 | class MyTCPHandler(socketserver.BaseRequestHandler): 11 | 12 | def handle(self): 13 | while True: 14 | data = self.request.recv(1024) 15 | if not data: 16 | break 17 | print("接收到来自客户端: {1}:{2}的数据: {0}".format(data.decode(encoding="utf-8", errors="ignore"), *self.client_address)) 18 | self.request.send("谢谢你!\n".encode()) 19 | 20 | def finish(self): 21 | print("客户端: {0}:{1} 断开连接!".format(*self.client_address)) 22 | 23 | 24 | if __name__ == "__main__": 25 | parser = argparse.ArgumentParser(description="简单的TCP回显服务器!") 26 | parser.add_argument("--hostname", dest="hostname", default="0.0.0.0", metavar="IP", help="请输入监听的IP地址") 27 | parser.add_argument("--port", dest="port", type=int, default=8888, metavar="端口", help="请输入监听的端口") 28 | args = parser.parse_args() 29 | with socketserver.TCPServer((args.hostname, args.port), MyTCPHandler) as server: 30 | print("Started server at {0}:{1}, PID: {2}".format(args.hostname, args.port, os.getpid())) 31 | server.serve_forever() 32 | -------------------------------------------------------------------------------- /python/network/socket/threading_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | 多线程版本的TCPServer 4 | """ 5 | import socket 6 | import argparse 7 | import threading 8 | 9 | 10 | class EchoServer: 11 | def __init__(self, host, port): 12 | self.thread_num = 0 13 | self.address = (host, port) 14 | self.pid = os.getpid() 15 | 16 | def handler(self, connection, client_address): 17 | while True: 18 | data = connection.recv(2048) 19 | if not data: 20 | break 21 | print("接收到来自客户端: {1}:{2}的数据: {0}".format(data.decode(encoding="utf-8", errors="ignore"), *client_address)) 22 | connection.sendall("谢谢你!\n".encode()) 23 | print("客户端: {0}:{1} 断开连接!".format(*client_address)) 24 | 25 | def run(self): 26 | server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 27 | server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 28 | server.bind(self.address) 29 | server.listen(5) 30 | 31 | print("Started server at {0}:{1}, PID: {2}".format(*self.address, self.pid)) 32 | 33 | while True: 34 | connection, client_address = server.accept() 35 | self.thread_num += 1 36 | thread = threading.Thread(target=self.handler, args=(connection, client_address,)) 37 | thread.start() 38 | print("Server thread {0} has started to process {1}:{2}".format(self.thread_num, *client_address)) 39 | 40 | 41 | if __name__ == "__main__": 42 | parser = argparse.ArgumentParser(description="简单的TCPServer!") 43 | parser.add_argument("--hostname", dest="hostname", default="0.0.0.0", metavar="IP", help="请输入监听的IP地址") 44 | parser.add_argument("--port", dest="port", type=int, default=8888, metavar="端口", help="请输入监听的端口") 45 | args = parser.parse_args() 46 | server = EchoServer(args.hostname, args.port) 47 | server.run() 48 | -------------------------------------------------------------------------------- /python/other/batch.py: -------------------------------------------------------------------------------- 1 | # 工作中遇到调用阿里云的接口有请求限制的情况, 每次请求只能发送20个IP地址, 如果我有超过20个的IP地址的话就需要分组请求了 2 | # 这里模拟一下分组的方法 3 | 4 | lst = ["number_{0}".format(n) for n in range(1, 95)] 5 | 6 | for i in range(0, len(lst), 20): 7 | print(lst[i:i+20]) 8 | print("--" * 20) 9 | -------------------------------------------------------------------------------- /python/other/count_appear_times.py: -------------------------------------------------------------------------------- 1 | #使用不同的办法来统计列表中对象的出现次数。 2 | import collections 3 | 4 | 5 | global_list = [1,2,3,4,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,3,4,4,4] 6 | def way1(): 7 | list1 = set(global_list) #list1是另外一个列表,里面的内容是global_list里面的无重复项. 8 | for item in list1: 9 | print(item,'出现了',global_list.count(item),'次') 10 | way1() 11 | 12 | #利用dict的特性。 13 | def way2(): 14 | list2 = set(global_list) 15 | dict2 = {} 16 | for item in list2: 17 | dict2[item] = global_list.count(item) 18 | print(dict2) 19 | 20 | way2() 21 | 22 | #利用python第三方模块collections. 23 | def way3(): 24 | print(collections.Counter(global_list)) 25 | way3() 26 | -------------------------------------------------------------------------------- /python/other/random_strings.py: -------------------------------------------------------------------------------- 1 | import string 2 | import random 3 | 4 | def random_string(length=8): 5 | return "".join(random.choices(string.ascii_letters+string.digits, k=length)) 6 | 7 | if __name__ == "__main__": 8 | print(random_string(10)) 9 | -------------------------------------------------------------------------------- /python/process/daemon.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #coding: utf-8 3 | import os 4 | import sys 5 | import time 6 | import atexit 7 | import signal 8 | 9 | 10 | """ 11 | Reference APUE 3rd 12 | coding rules: 13 | 1. Call umask to set the file mode creation mask to a known value, usually 0. 14 | 2. 15 | the parent of the user-level daemons is the init process 16 | 17 | """ 18 | 19 | 20 | 21 | class Daemon(object): 22 | """ 23 | A generic daemon class. 24 | https://gist.github.com/andreif/cbb71b0498589dac93cb 25 | Usage: subclass the Daemon class and override the run() method 26 | """ 27 | def __init__(self, pidfile, stdin="/dev/null", stdout="/dev/null", stderr="/dev/null"): 28 | self.stdin = stdin 29 | self.stdout = stdout 30 | self.stderr = stderr 31 | self.pidfile = pidfile 32 | 33 | def daemonize(self): 34 | """ 35 | do the UNIX double-fork magic. 36 | """ 37 | # 调用umask修改子进程的新建文件权限掩码(比如umask是022,那么新建的文件为666-022=644,文件夹为777-022=755) 38 | # 这样子进程就不会受父进程的umask影响了,增加守护进程灵活性 39 | os.umask(0) 40 | # 创建子进程 41 | try: 42 | pid = os.fork() 43 | # os.fork会创建子进程,如果是在父进程环境中就会返回子进程的pid(一般不为0),如果在子进程的环境中则返回0 44 | # 如果是父进程环境,则让父进程退出,这样的话子进程就会被init托管而不受主进程的影响 45 | if pid > 0: 46 | sys.exit(0) 47 | except OSError, e: 48 | sys.stderr.write("Fork #1 failed: %d (%s)\n" % (e.errno, e.strerror)) 49 | sys.exit(1) 50 | # 让子进程脱离父进程的环境 51 | # 切换工作目录到根目录,防止占用可卸载的文件系统,也可以换成其它路径 52 | os.chdir("/") 53 | # 使用getsid创建一个新的会话(一个或多个进程组的集合),组长进程调用该函数会返回-1(报错),非组长进程调用返回进程组id;该进程会变成会话首进程,该进程会变成新进程组 54 | # 的组长进程,该进程没有控制终端,如果之前有,则会被中断;组长进程不能成为新会话首进程,新会话首进程必定会成为组长进程 55 | os.setsid() 56 | #在基于 System V的系统中,有人建议此时再次调用fork,并终止父进程,第二个子进程作为父进程继续运行,这样就保证了盖守护进程不会成为会话首进程, 57 | #会话的领头进程打开一个终端之后, 该终端就成为该会话的控制终端,意思是一个会话只有会话首进程才有可能控制终端; 58 | try: 59 | pid = os.fork() 60 | if pid > 0: 61 | sys.exit(0) 62 | except OSError, e: 63 | sys.stderr.write("fork #2 failed: %d (%s)\n" % (e.errno, e.strerror)) 64 | sys.exit(1) 65 | 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /python/process/sub_process.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | import subprocess 3 | 4 | def run_cmd(cmd): 5 | try: 6 | p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 7 | p.wait(timeout=10) 8 | out = p.stdout.read() #type: str 9 | err = p.stderr.read() #type: str 10 | return p.returncode, out, err 11 | except subprocess.TimeoutExpired: 12 | print("执行代码超时: {0}".format(cmd)) 13 | except: 14 | print(traceback.print_exc()) 15 | finally: 16 | p.kill() #The child process is not killed if the timeout expires 17 | 18 | if __name__ == "__main__": 19 | cmd = "sleep 15" 20 | run_cmd(cmd) 21 | -------------------------------------------------------------------------------- /python/readme.md: -------------------------------------------------------------------------------- 1 | ### Learn Python By Coding 2 | 3 | #### Goal 4 | * Writing python code shorter, faster and more readable at the same time 5 | 6 | 7 | #### Function 8 | 9 | ```python 10 | # The default values are evaluated at the point of function definition in the defining scope. 11 | 12 | i = 5 13 | 14 | def func(arg=i): 15 | print arg 16 | 17 | i = 6 18 | 19 | func() 20 | 21 | will print 5 22 | 23 | 24 | 当使用可变的数据结构作为默认参数时应当注意: 25 | x = [] 26 | def func(a, L=x): 27 | L.append(a) 28 | return L 29 | 30 | print func(1) 31 | print func(2) 32 | print func(3) 33 | print x 34 | [1] 35 | [1, 2] 36 | [1, 2, 3] 37 | [1, 2, 3] 38 | 解读: 39 | 1. 函数默认参数的定义只在函数定义的时候执行一次,python参数的传递实际上是对象的引用,当参数是一个不可变的对象的时候,比如说字符串,函数会把参数的链接指向那个字符串,所以不管函数在哪执行,默认参数都已经做好引用了,就不会变了; 40 | ,如果默认参数是一个可变对象的话当函数定义以后,如例2,L就会引用定义函数时候的那个空数组,函数在下面被执行的时候修改的其实是那个数组,而L只是一个引用,所以就会出现上面的情况,这样的情况不是我们想要的,避免这种情况的办法是不要给函数传递可变的默认参数, 41 | ,当传递的参数不是默认参数的时候没有问题,因为每一次都传入一个新的引用: 42 | 43 | def func(a, L=None): 44 | if L is None: 45 | L = [] 46 | L.append(a) 47 | return L 48 | 这样就不会有问题了 49 | 50 | 2. non-keyword argument after a keyword argument;default argument must follows non-default argument; 51 | 52 | 53 | # If arguments are not available separately, write the function call with the *-operator to unpack the arguments out of a list or tuple: 54 | >>>args = [3, 6] 55 | >>>range(*args) 56 | [3, 4, 5] 57 | 58 | ``` 59 | 60 | *. 求多个长度相同的list个对应元素之和(假设都是整数) 61 | 62 | ```python 63 | #之前比较年轻,使用遍历的笨办法,这次试用map 64 | 65 | x, y, z= range(1, 5), range(5, 9), range(9, 13) 66 | map(lambda a,b,c: a+b+c, x, y, z) 67 | 68 | [15, 18, 21, 24] 69 | ``` 70 | 71 | *. 列出1-20之间能被3或者5整除的数字 72 | ``` 73 | def f(x): return x % 3 == 0 or x % 5 == 0 74 | filter(f, range(1, 20)) 75 | 76 | [3, 5, 9, 10, 12, 15, 18, 20] 77 | 78 | ``` 79 | *. 求一个整数list的所有元素的积 80 | ``` 81 | reduce(lambda x, y: x*y, [1,2,3,4,5]) 82 | 83 | 120 84 | 85 | ``` 86 | * 列表推导式 87 | ``` 88 | >>> [(x, y) for x in [1,2,3] for y in [3,1,4] if x != y] 89 | [(1, 3), (1, 4), (2, 3), (2, 1), (2, 4), (3, 1), (3, 4)] 90 | 91 | >>> {x for x in [1,2,3,1,2,3]} 92 | set([1, 2, 3]) 93 | 94 | ``` 95 | 96 | * zip使用方法 97 | ``` 98 | x = ["Rocky", "Victor", "Huan"] 99 | y = [26, 24, 25, 100] 100 | zip(x, y) 101 | [("Rocky", 26), ("Victor", 24), ("Huan", 25)] 102 | 103 | dict(zip(x, y)) 104 | {"Rocky": 26, "Victor": 24, "Huan": 25} 105 | 106 | for k, v in zip(x, y): 107 | print k, v 108 | 109 | Rocky 26 110 | Victor 24 111 | Huan 25 112 | 113 | ``` 114 | 115 | * enumerate 116 | ``` 117 | >>> x = ["Rocky", "Victor", "Huan"] 118 | >>> for k,v in enumerate(x): 119 | ... print k,v 120 | 121 | 0 Rocky 122 | 1 Victor 123 | 2 Huan 124 | 125 | ``` 126 | 127 | * assign the result of a comparison or other Boolean expression to a variable 128 | ``` 129 | 工作中经常会遇到这样一种情况,有三个变量x,y,z,如果x符合条件(这里以是否为空来测试)就返回x,然后再看看y是否符合,如果符合就返回,再看看z 130 | 一般的做法是: 131 | if x: 132 | return x 133 | elif y: 134 | return x 135 | elif z: 136 | return z 137 | 138 | 其实可以更简洁: 139 | r = x or y or z 140 | return r 141 | 142 | 143 | ``` 144 | 145 | * 三目运算符 146 | 147 | ``` 148 | x = "A" if 1>0 else "B" 149 | "A" 150 | 151 | y = 1/0 if 0 else 1 152 | 1 # 三目运算符只有当第一个条件为真的时候才会去运算1/0,所以这个表达式不会报错 153 | ``` 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | #### Sort 162 | 163 | 164 | sorted和list的内置方法sort比较: 165 | 166 | ``` 167 | * sorted返回一个新的已经排好序的list而不改变原有的list,sort方法会改变原来的数据,如果源数据不需要的话可以使用sort方法 168 | * 从python2.2开始sort和sorted都是稳定的排序 169 | ``` 170 | 171 | * 按照年龄倒排 172 | 173 | ```python 174 | 方法一: 175 | >>> home = [{"name": "Rocky", "age": 26},{"name": "Victor", "age": 24}, {"name": "Father", "age": 53}] 176 | >>> print sorted(home, key=lambda person: person["age"], reverse=True) 177 | [{'age': 53, 'name': 'Father'}, {'age': 26, 'name': 'Rocky'}, {'age': 24, 'name': 'Victor'}] 178 | 179 | 方法二: 180 | >>>from operator import itemgetter 181 | >>>print sorted(home, key=itemgetter("age"), reverse=True) 182 | [{'age': 53, 'name': 'Father'}, {'age': 26, 'name': 'Rocky'}, {'age': 24, 'name': 'Victor'}] 183 | ``` 184 | 185 | * 按照列表中字符串所含有的感叹号的数量排序 186 | 187 | ```python 188 | >>>from operator import methodcaller 189 | >>>messages = ['critical!!!', 'hurry!', 'standby', 'immediate!!'] 190 | >>>sorted(messages, key=methodcaller("count", "!")) 191 | ['standby', 'hurry!', 'immediate!!', 'critical!!!'] 192 | ``` 193 | 194 | * 体重降序 年龄升序(体重为主,年龄为次) 195 | 196 | ```python 197 | #有主次的话先比较次优先级的然后比较主优先级的 198 | >>>home = [["Rocky", 26, 75], ["Victor", 24, 75], ["tony", 32, 70]] 199 | >>>from operator import itemgetter 200 | >>> s = sorted(home, key=itemgetter(1)) 201 | >>> s 202 | [['Victor', 24, 75], ['Rocky', 26, 75], ['tony', 32, 70]] 203 | >>> sorted(s, key=itemgetter(2)) 204 | [['tony', 32, 70], ['Victor', 24, 75], ['Rocky', 26, 75]] 205 | ``` 206 | 207 | * 给如下的list排序(不区分大小写) 208 | 209 | ```python 210 | x = ["a", "b", "C", "D"] 211 | 212 | sorted(x) 213 | ["C", "D", "a", "b"] #sorted对于字母/单词的默认排序顺序是按照字母的ASCII表,大写字母都小于小写字母 214 | 方法一: 215 | sorted接收一个cmp参数,这个参数是传一个拥有两个参数的函数,比如说x, y,如果x小于y的话返回负数一般为-1,x大于y的话返回正数一般为1,相等的话返回0 216 | 我们可以重写这个函数,如下 217 | def cmp(x, y): 218 | if x.lower() < y.lower(): 219 | return -1 220 | elif x.lower() > y.lower(): 221 | return 1 222 | return 0 223 | 224 | sorted(x, cmp=cmp) 225 | ["a", "b", "C", "D"] #如果要倒序的话可以让上面的函数相反地返回,该返回-1的返回1即可或者使用reverse参数为True即可 226 | 227 | 方法二(推荐): 228 | To use key= custom sorting, remember that you provide a function that takes one value and returns the proxy value to guide the sorting. 229 | 230 | sorted(x, key=str.lower) 231 | ["a", "b", "C", "D"] 232 | 233 | ``` 234 | 235 | #### data structures 236 | 237 | * List 238 | 239 | ```python 240 | 1. list append quivalent a[len(a):] = [x] 241 | 242 | >>> lst = [1,2,3,4] 243 | >>>lst[len(lst):] = [5,6] 244 | >>>lst 245 | >>>[1,2,3,4,5,6] 246 | >>>lst[:2] = [9,8] 247 | >>>lst 248 | [9,8,3,4,5,6] 249 | >>>del lst[1:4] 250 | >>> lst 251 | [9,5,6] 252 | 253 | ``` 254 | 255 | * Tuple 256 | 257 | ```python 258 | >>>x = "Rocky" 259 | >>>len(x) 260 | 5 261 | >>>x = "Rocky", # 注意后面有一个逗号 262 | >>>len(x) 263 | 1 264 | >>>x = 1, 2, "Hi" 265 | >>>x 266 | (1, 2, "Hi") 267 | >>>a, b, c = x 268 | >>>a, b, c 269 | 1, 2, "Hi" 270 | 271 | >>>(a, b), c = "XY", "Z" 272 | >>>a,b,c 273 | X Y Z 274 | 275 | >>> a,b,*c = range(5) 276 | >>> a,b,c 277 | (0, 1, [2, 3, 4]) 278 | >>> a,*b,c = range(5) 279 | >>> a,b,c 280 | (0, [1, 2, 3], 4) 281 | >>> 282 | 283 | >>> t = [("Rocky", 26), ("Tony", 32)] 284 | >>> for name, age in t: 285 | ... print(name,age) 286 | ... 287 | ... 288 | Rocky 26 289 | Tony 32 290 | ``` 291 | 292 | * String 293 | 294 | ```python 295 | >>>x = 123.4567 296 | >>>y = 98.76543 297 | >>>"{0:.2f} {1:.2f}".format(x, y) 298 | 123.45 98.76 299 | 300 | >>> name = "Rocky" 301 | >>> age = 26 302 | >>> "{0:10} ==> {1:10d}".format(name, age) 303 | 'Rocky ==> 26' 304 | >>>"{0:.2f}s vs {1:.3f}s".format(1.234, 1.234) 305 | 1.23s vs 1.234s 306 | ``` 307 | 308 | * dict 309 | 310 | ```python 311 | * 把一个list变成一个dict并附件默认值 312 | >>>x = ["Rocky", "Tony", "Abc"] 313 | >>>y = dict.fromkeys(x, "www") 314 | >>>y 315 | {"Rocky": "www", "Tony": "www", "Abc": "www"} 316 | 317 | ``` 318 | 319 | 320 | #### Built-in Functions 321 | 322 | * issubclass(A, B) 或者 issubclass(A, (B, C)) (B, C是或的关系) 323 | ``` 324 | 判断某一个类是不是另一个类的子类,比如: 325 | from collections import defaultdict 326 | issubclass(defaultdict, dict) 327 | True 328 | 329 | ``` 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | -------------------------------------------------------------------------------- /python/system/hosts.py: -------------------------------------------------------------------------------- 1 | #coding: utf-8 2 | import pprint 3 | 4 | 5 | class DuplicateKeyError(Exception): 6 | pass 7 | 8 | class Hosts: 9 | def __init__(self, path="/etc/hosts"): 10 | self.path = path 11 | self.ip_map_host = {} # ip和host的对应关系 12 | self.host_map_ip = {} # host和ip的对应关系 13 | self.unprocessed = [] # 不可以处理的hosts行, 比如ipv6, 或者注释的行, 这里为了不修改这些内容 14 | self.host2obj() 15 | 16 | def host2obj(self): 17 | """ 18 | 将hosts文件转换为python对象, 方便操作, 字典,value为set 19 | {"192.168.1.1": {"rockywu.me", "example.com"}} 20 | """ 21 | flines = [] 22 | with open(self.path) as f: 23 | lines = f.readlines() 24 | for fl in lines: 25 | if fl.startswith("#") or fl.startswith("::"): 26 | self.unprocessed.append(fl) 27 | elif fl: 28 | flines.append(fl.strip()) 29 | 30 | for fline in flines: 31 | if fline: 32 | ip = fline.strip().split()[0] 33 | hosts = fline.strip().split()[1:] 34 | # 添加到ip_map_host中 35 | if ip in self.ip_map_host: 36 | for host in hosts: 37 | self.ip_map_host[ip].add(host) 38 | else: 39 | self.ip_map_host[ip] = set(hosts) 40 | # 添加到host_map_ip中 41 | for host in hosts: 42 | if host not in self.host_map_ip: 43 | self.host_map_ip[host] = ip 44 | else: 45 | print(host) 46 | raise DuplicateKeyError("一个域名不能解析到多个IP地址!") 47 | #pprint.pprint(self.host_map_ip) 48 | #pprint.pprint(self.ip_map_host) 49 | 50 | def set(self, ip, host): 51 | if ip in self.ip_map_host: 52 | if host in self.host_map_ip: 53 | if self.host_map_ip[host] == ip: 54 | return 55 | else: 56 | self.ip_map_host[self.host_map_ip[host]].remove(host) 57 | self.ip_map_host[ip].add(host) 58 | else: 59 | self.ip_map_host[ip].add(host) 60 | else: 61 | if host in self.host_map_ip: 62 | self.ip_map_host[self.host_map_ip[host]].remove(host) 63 | self.ip_map_host[ip] = set() 64 | self.ip_map_host[ip].add(host) 65 | else: 66 | self.ip_map_host[ip] = set() 67 | self.ip_map_host[ip].add(host) 68 | 69 | def save(self): 70 | """保存obj到原hosts文件中""" 71 | flines = [] 72 | for ip, host in self.ip_map_host.items(): 73 | if host: 74 | flines.append(ip + " " + " ".join(host) + "\n") 75 | 76 | with open(self.path, "w") as f: 77 | f.writelines(flines) 78 | if self.unprocessed: 79 | f.writelines(self.unprocessed) 80 | 81 | 82 | if __name__ == "__main__": 83 | host = Hosts(path="hosts") 84 | host.set(ip="1.1.1.1", host="wufeiqun.com") 85 | host.save() 86 | -------------------------------------------------------------------------------- /script/README.md: -------------------------------------------------------------------------------- 1 | #### 说明 2 | 3 | 这里的脚本都是服务器维护常用的脚本, 启动/停止某些服务等. 直接把这个目录放到服务器上就可以直接使用了 4 | -------------------------------------------------------------------------------- /script/change_hostname.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 工作上有些场景需要把服务器的hostname修改成ip地址, 以便大数据采集日志需要, 写了这个脚本, 适应不同的系统 3 | import sys 4 | import socket 5 | import fcntl 6 | import struct 7 | import platform 8 | import socket 9 | import subprocess 10 | 11 | def get_ip_address(ifname): 12 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 13 | return socket.inet_ntoa(fcntl.ioctl(s.fileno(), 0x8915, struct.pack('256s', ifname[:15]))[20:24]) 14 | 15 | def get_sys_version(): 16 | version = platform.platform() 17 | if "el6" in version: 18 | return "el6" 19 | elif "el7" in version: 20 | return "el7" 21 | elif "Ubuntu": 22 | return "ubuntu" 23 | 24 | def change(): 25 | version = get_sys_version() 26 | ip = get_ip_address("eth0") 27 | ip_new = ip.replace(".", "_") 28 | if version == "el6": 29 | subprocess.call(["hostname", ip_new]) 30 | subprocess.call(["cp", "/etc/sysconfig/network", "/tmp/network.bak"]) 31 | with open("/etc/sysconfig/network", "r") as f: 32 | ret = f.readlines() 33 | ret[1] = "HOSTNAME={0}\n".format(ip_new) 34 | with open("/etc/sysconfig/network", "w") as f: 35 | f.writelines(ret) 36 | elif version == "el7": 37 | subprocess.call(["hostnamectl", "--static", "set-hostname", ip_new]) 38 | subprocess.call(["hostnamectl", "set-hostname", ip_new]) 39 | elif version == "ubuntu": 40 | subprocess.call(["hostnamectl", "--static", "set-hostname", ip_new]) 41 | subprocess.call(["hostnamectl", "set-hostname", ip_new]) 42 | 43 | 44 | def show(): 45 | ip = get_ip_address("eth0") 46 | version = get_sys_version() 47 | ip_new = ip.replace(".", "_") 48 | print("ip={0}, version={1}, ip_new={2}, hostname={3}".format(ip, version, ip_new, socket.gethostname())) 49 | 50 | if __name__ == "__main__": 51 | change() 52 | show() 53 | -------------------------------------------------------------------------------- /script/nginx: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | nginxd=/home/rocky/nginx/sbin/nginx 3 | nginx_config=/home/rocky/nginx/conf/nginx.conf 4 | nginx_pid=/home/rocky/nginx/logs/nginx.pid 5 | nginx_lock=/home/rocky/nginx/logs/nginx.lock 6 | 7 | 8 | # Start nginx. 9 | start() { 10 | if [ -f $nginx_lock ] && [ -f $nginx_pid ];then 11 | echo "nginx already running...." 12 | return 0 13 | fi 14 | echo $"Starting nginx..." 15 | $nginxd -c $nginx_config 16 | RETVAL=$? 17 | if [ $RETVAL = 0 ];then 18 | touch $nginx_lock 19 | echo "nginx started!" 20 | return $RETVAL 21 | else 22 | echo "nginx start failed!" 23 | return 1 24 | fi 25 | } 26 | 27 | # Stop nginx. 28 | stop() { 29 | if [ ! -f $nginx_lock ] && [ ! -f $nginx_pid ];then 30 | echo "nginx already stopped...." 31 | return 0 32 | fi 33 | echo $"Stopping nginx..." 34 | $nginxd -s stop 35 | RETVAL=$? 36 | if [ $RETVAL = 0 ];then 37 | rm -f $nginx_lock $nginx_pid 38 | echo "nginx stopped!" 39 | return $RETVAL 40 | else 41 | echo "nginx stop failed!" 42 | return 1 43 | fi 44 | } 45 | 46 | configtest() { 47 | $nginxd -t -c $nginx_config 48 | } 49 | 50 | # Reload nginx. 51 | reload() { 52 | echo $"Reloading nginx... " 53 | $nginxd -s reload 54 | RETVAL=$? 55 | return $RETVAL 56 | } 57 | 58 | 59 | status() { 60 | if [ -e $nginx_lock ];then 61 | echo "nginx already running...." 62 | exit 0 63 | fi 64 | echo "nginx is stop!" 65 | } 66 | 67 | # See how we were called. 68 | case "$1" in 69 | start) 70 | start 71 | ;; 72 | stop) 73 | stop 74 | ;; 75 | reload) 76 | reload 77 | ;; 78 | restart) 79 | stop 80 | start 81 | ;; 82 | status) 83 | status 84 | ;; 85 | *) 86 | echo $"Usage: $0 {start|stop|status|restart|reload}" 87 | exit 2 88 | esac 89 | 90 | exit $RETVAL 91 | -------------------------------------------------------------------------------- /shell/init_centos7.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #This is a script to initialize the centos7 system,just for my flavor,you can modifiy it according your need. 3 | 4 | #install epel-release and update. 5 | yum install epel-release -y && yum update -y 6 | yum -y groupinstall 'Development tools' 7 | yum install -y gcc gcc-c++ make autoconf automake libtool make pcre-devel openssl-devel bison-devel patch unzip ncurses-devel zlib-devel bzip2-devel wget readline-devel sqlite-devel telnet nc git htop psmisc net-tools 8 | 9 | #disable selinux 10 | if [ ! -f "/etc/selinux/config" ]; then 11 | sed -i 's/SELINUX=enforcing/SELINUX=disabled/g' /etc/selinux/config 12 | fi 13 | setenforce 0 14 | 15 | # stop firewall 16 | systemctl stop firewalld.service 17 | systemctl disable firewalld.service 18 | 19 | #change default file decorators 20 | if [ -f "/etc/security/limits.d/20-nproc.conf" ]; then 21 | 22 | cat > /etc/security/limits.d/20-nproc.conf << EOF 23 | * soft nofile 65535 24 | * hard nofile 65535 25 | * soft nproc 65535 26 | * hard nproc 65535 27 | EOF 28 | else 29 | cat >> /etc/security/limits.conf << EOF 30 | * soft nofile 65535 31 | * hard nofile 65535 32 | * soft nproc 65535 33 | * hard nproc 65535 34 | EOF 35 | fi 36 | #stop ctl-alt-del function 37 | rm -f /usr/lib/systemd/system/ctrl-alt-del.target 38 | 39 | #system characters 40 | cat > /etc/locale.conf << EOF 41 | LANG="en_US.UTF-8" 42 | EOF 43 | 44 | # Set SSHD config 45 | 46 | #optimized kenel 47 | 48 | #Use aliyun repo 49 | http://mirrors.aliyun.com/help/centos 50 | 51 | #adduser rocky 52 | useradd rocky 53 | chmod +w /etc/sudoers 54 | echo "rocky ALL=(ALL)NOPASSWD: ALL" >>/etc/sudoers 55 | chmod -w /etc/sudoers 56 | 57 | -------------------------------------------------------------------------------- /shell/logrotate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # nginx日志切割样例, 线上nginx最好使用yum来安装 3 | 4 | log_name="access.log" 5 | log_path="/var/log/nginx/" 6 | backup_path="/data/" 7 | pid_path="/var/run/nginx.pid" 8 | # 按天备份日志 9 | #log_name_new="${log_name}_$(date -d "today -1 day" +"%Y%m%d").log" 10 | #按小时备份日志, 日志名字为2018012401的日志包括了1点到2点之间的日志 11 | log_name_new="${log_name}_$(date -d "today -1 hour" +"%Y%m%d%H").log" 12 | # 备份目录不存在的话创建 13 | if [ ! -x "$backup_path" ]; then 14 | mkdir "${backup_path}" 15 | fi 16 | 17 | # 备份 18 | mv ${logs_path}/${log_name} ${backup_path}/${log_name_new} 19 | 20 | # 压缩 21 | gzip -f ${backup_path}/${log_name_new} 22 | 23 | # nginx重新打印日志 24 | kill -USR1 `cat ${pid_path}` 25 | 26 | # 删除历史日志 27 | find ${backup_path} -maxdepth 1 -name "*.gz" -mtime +30 |xargs rm -f 28 | -------------------------------------------------------------------------------- /shell/mysql_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DATE=`date "+%Y%m%d %H:%M:%S"` 3 | MYSQL_TAR='/root/software/mysql-5.5.42' 4 | MYSQL_DIR=/home/test/mysql 5 | RPMLIST="make gcc gcc-c++ autoconf automake bison bison-devel ncurses ncurses-devel zlib* libxml* libtool-ltdl-devel* cmake" 6 | #create mysql user ! 7 | if [[`ls /home|grep test` = test ]];then 8 | echo "user test is exsit!" 9 | else 10 | useradd -d /home/test test 11 | fi 12 | #create mysql directory. 13 | cd /home/test 14 | mkdir mysql mysql/data mysql/etc mysql/tmp mysql/var mysql/log script 15 | chown -R test:test /home/test/ 16 | 17 | 18 | #check and uninstall the old version! 19 | echo '----------check and uninstall the old version mysql----------' 20 | sleep 1 21 | rpm -qa |grep mysql > /tmp/mysqlremove.txt 22 | if [ $? -eq 0 ];then 23 | for i in $(cat /tmp/mysqlremove.txt); do rpm -e --nodeps $i; done 24 | echo -e "$DATE \033[32m MYSQL already removed \033[0m" >> /home/test/mysql_install.log 25 | else 26 | echo -e "$DATE \033[32m MYSQL does not exist \033[0m" >> /home/test/mysql_install.log 27 | fi 28 | 29 | 30 | #install some dependent softwares! 31 | sleep 1 32 | 33 | yum -y install $RPMLIST 34 | rpm -q --qf '%{NAME}-%{VERSION}-%{RELEASE} (%{ARCH})\n' make gcc gcc-c++ autoconf automake bison bison-devel ncurses ncurses-devel zlib* libxml* libtool-ltdl-devel* cmake 35 | 36 | #download the mysql tar file. 37 | cd /root/software 38 | wget http://mirrors.sohu.com/mysql/MySQL-5.5/mysql-5.5.42.tar.gz 39 | #install mysql! 40 | echo '----------解压部分开始----------' 41 | sleep 1 42 | tar -zxvf $MYSQL_TAR.tar.gz -C /root/software/ 43 | echo 'tar -xf already ----> ok' >> /home/test/mysql_install.log 44 | sleep 5 45 | if [ -d $MYSQL_TAR ];then 46 | cd $MYSQL_TAR 47 | sleep 2 48 | fi 49 | echo '----------重头戏装包开始----------' 50 | sleep 1 51 | if [ -f $MYSQL_TAR/CMakeCache.txt ]; then 52 | echo 你已经装好了一个数据库顶多是没有执行 请先启动正常使用如有问题执行 ./mysql_install_db --user=test --basedir=/home/test/mysql --datadir=/home/test/mysql/data 执行上一条命令如果还是不行请删除你现有的数据库再执行该脚本 53 | else 54 | cd $MYSQL_TAR 55 | cmake \ 56 | -DCMAKE_INSTALL_PREFIX=/home/test/mysql \ 57 | -DMYSQL_DATADIR=/home/test/mysql/data \ 58 | -DSYSCONFDIR=/home/test/mysql/etc \ 59 | -DWITH_MYISAM_STORAGE_ENGINE=1 \ 60 | -DWITH_INNOBASE_STORAGE_ENGINE=1 \ 61 | -DWITH_MEMORY_STORAGE_ENGINE=1 \ 62 | -DWITH_READLINE=1 \ 63 | -DMYSQL_UNIX_ADDR=/home/test/mysql/tmp/mysql.sock \ 64 | -DMYSQL_TCP_PORT=3306 \ 65 | -DENABLED_LOCAL_INFILE=1 \ 66 | -DWITH_PARTITION_STORAGE_ENGINE=1 \ 67 | -DEXTRA_CHARSETS=all \ 68 | -DDEFAULT_CHARSET=utf8 \ 69 | -DDEFAULT_COLLATION=utf8_general_ci 70 | 71 | fi 72 | 73 | echo '----------start make----------' 74 | sleep 1 75 | make 76 | sleep 5 77 | echo '----------start make install----------' 78 | sleep 5 79 | make install 80 | 81 | cp $MYSQL_DIR/bin/mysql /usr/bin/ 82 | cp $MYSQL_DIR/bin/mysqldump /usr/bin/ 83 | cp $MYSQL_DIR/support-files/my-huge.cnf $MYSQL_DIR/etc/my.cnf 84 | cp $MYSQL_DIR/etc/my.cnf $MYSQL_DIR/etc/my.cnf.bak 85 | /home/test/mysql/scripts/mysql_install_db --user=test --basedir=/home/test/mysql --datadir=/home/test/mysql/data --skip-name-resolve --defaults-file=$MYSQL_DIR/etc/my.cnf 86 | chown -R test:test /home/test/mysql 87 | #startup the mysqli 88 | echo " #!/bin/bash ">/home/test/script/mysql_start.sh 89 | echo "/home/test/mysql/bin/mysqld --defaults-file=/home/test/mysql/etc/my.cnf &">>/home/test/script/mysql_start.sh 90 | chmod +x /home/test/script/mysql_start.sh 91 | chown -R test:test /home/test 92 | su - test -c bash /home/test/script/mysql_start.sh 93 | echo "----------mysql successfully installed!----------" 94 | echo "-----Please run the /home/test/mysql/bin/mysql_secure_installation to init mysql!-----" 95 | 96 | 97 | -------------------------------------------------------------------------------- /shell/nginx_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #owner:rocky 3 | #date:201700908 4 | USER=rocky 5 | GROUP=rocky 6 | DOWN=/home/rocky/software 7 | NGINX=/home/rocky/nginx 8 | FILENAME=nginx-1.12.1 9 | SCRIPT=/home/rocky/script 10 | 11 | #first step,check the directory if exist! 12 | 13 | if [ ! -x $DOWN ] ;then 14 | mkdir $DOWN 15 | fi 16 | 17 | if [ ! -x $SCRIPT ] ;then 18 | mkdir $SCRIPT 19 | fi 20 | 21 | cd $DOWN 22 | wget http://nginx.org/download/$FILENAME.tar.gz 23 | tar -zxf $FILENAME.tar.gz -C 24 | echo "---------------download nginx successfully-------------" 25 | #compile nginx! 26 | cd $DOWN/$FILENAME 27 | ./configure \ 28 | --prefix=$NGINX \ 29 | --sbin-path=$NGINX/sbin/nginx 30 | --conf-path=$NGINX/conf/nginx.conf \ 31 | --pid-path=$NGINX/logs/nginx.pid \ 32 | --with-http_ssl_module \ 33 | --user=$USER \ 34 | --group=$GROUP \ 35 | 36 | make 37 | make install 38 | 39 | #start the nginx with qfpay user. 40 | cd /home/$USER/nginx/sbin 41 | chown root nginx 42 | chmod u+s nginx 43 | echo "--------------------nginx installation successful-------------------------------" 44 | 45 | cd $NGINX 46 | mkdir conf.d 47 | 48 | -------------------------------------------------------------------------------- /shell/php-fpm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # nginx Startup script for the PHP-FPM HTTP Server 3 | # 4 | # chkconfig: 345 85 15 5 | # description: PHP is an HTML-embedded scripting language 6 | # processname: php-fpm 7 | # config: /usr/local/mpsrv/php/etc/php.ini 8 | 9 | # Source function library. 10 | . /etc/rc.d/init.d/functions 11 | 12 | PHP_PATH=/home/git/ 13 | DESC="php-fpm daemon" 14 | NAME=php-fpm 15 | # php-fpm路径 16 | DAEMON=$PHP_PATH/php/sbin/$NAME 17 | # 配置文件路径 18 | CONFIGFILE=$PHP_PATH/php/etc/php-fpm.conf 19 | # PID文件路径(在php-fpm.conf设置) 20 | PIDFILE=$PHP_PATH/php/var/run/$NAME.pid 21 | SCRIPTNAME=/etc/init.d/$NAME 22 | 23 | # Gracefully exit if the package has been removed. 24 | test -x $DAEMON || exit 0 25 | 26 | rh_start() { 27 | $DAEMON -y $CONFIGFILE || echo -n " already running" 28 | } 29 | 30 | rh_stop() { 31 | kill -QUIT `cat $PIDFILE` || echo -n " not running" 32 | } 33 | 34 | rh_reload() { 35 | kill -USR2 `cat $PIDFILE` || echo -n " can't reload" 36 | #kill -HUP `cat $PIDFILE` || echo -n " can't reload" 37 | } 38 | 39 | case "$1" in 40 | start) 41 | echo -n "Starting $DESC: $NAME" 42 | rh_start 43 | echo "." 44 | ;; 45 | stop) 46 | echo -n "Stopping $DESC: $NAME" 47 | rh_stop 48 | echo "." 49 | ;; 50 | reload) 51 | echo -n "Reloading $DESC configuration..." 52 | rh_reload 53 | echo "reloaded." 54 | ;; 55 | restart) 56 | echo -n "Restarting $DESC: $NAME" 57 | rh_stop 58 | sleep 1 59 | rh_start 60 | echo "." 61 | ;; 62 | *) 63 | echo "Usage: $SCRIPTNAME {start|stop|restart|reload}" >&2 64 | exit 3 65 | ;; 66 | esac 67 | exit 0 68 | -------------------------------------------------------------------------------- /shell/python_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #install python3.6 to /home/rocky/python3 3 | #for Centos7 4 | #owner:rocky 5 | #date:2017.09 6 | 7 | DOWN=/home/rocky/software 8 | PREFIX=/home/rocky/python3 9 | VERSION=3.6.2 10 | URL=https://www.python.org/ftp/python/$VERSION/Python-$VERSION.tgz 11 | 12 | if [ ! -d $DOWN ];then 13 | mkdir $DOWN 14 | fi 15 | 16 | if [ ! -d $PREFIX ];then 17 | mkdir $PREFIX 18 | fi 19 | 20 | cd $DOWN 21 | wget $URL 22 | 23 | #tar and compile python. 24 | tar -zxf Python-$VERSION.tgz 25 | cd Python-$VERSION 26 | ./configure --prefix=$PREFIX 27 | make && make install 28 | 29 | -------------------------------------------------------------------------------- /software/README.md: -------------------------------------------------------------------------------- 1 | #### 说明 2 | 3 | --- 4 | 5 | 这里备份着一些经常使用而又不怎么好安装的软件, 主要是CentOS 7上面的. 6 | -------------------------------------------------------------------------------- /software/axel-2.4-1.el6.rf.x86_64.rpm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wufeiqun/LearnByCoding/4fc2c11601922b418b864820cb29bbb98fb65d29/software/axel-2.4-1.el6.rf.x86_64.rpm -------------------------------------------------------------------------------- /software/axel-2.4-9.el7.x86_64.rpm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wufeiqun/LearnByCoding/4fc2c11601922b418b864820cb29bbb98fb65d29/software/axel-2.4-9.el7.x86_64.rpm --------------------------------------------------------------------------------