├── .gitignore ├── Docker ├── Dockerfile └── requirements.txt ├── MyShow ├── GetData_zhihu.py ├── MyShow.py ├── static │ ├── css │ │ └── dashboard.css │ ├── favicon.ico │ └── js │ │ ├── bootstrap-typeahead.min.js │ │ ├── bootstrap.min.js │ │ ├── echarts.min.js │ │ └── jquery-3.0.0.min.js └── templates │ ├── base.html │ ├── error.html │ └── index.html ├── README.md ├── Text ├── Obama.txt ├── Walden.txt └── Zarathustra.txt ├── python_aiohttp.py ├── python_base.py ├── python_context.py ├── python_coroutine.py ├── python_csv.py ├── python_datetime.py ├── python_decorator.py ├── python_flask.py ├── python_functional.py ├── python_lda.py ├── python_magic_methods.py ├── python_markov_chain.py ├── python_metaclass.py ├── python_numpy.py ├── python_oneline.py ├── python_requests.py ├── python_restful_api.py ├── python_spider.py ├── python_sqlalchemy.py ├── python_thread_multiprocess.py ├── python_version36.py ├── python_visual.py ├── python_visual_animation.py ├── python_wechat.py └── python_weibo.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | *.[oa] 3 | *.py[co] 4 | *.exe 5 | *.log 6 | *.out 7 | *.dat 8 | *.temp 9 | *.png 10 | *.pkl 11 | *.mp3 12 | *.mp4 13 | *.csv 14 | .* 15 | 16 | test/ 17 | data/ 18 | build/ 19 | captcha.jpeg 20 | -------------------------------------------------------------------------------- /Docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Dockerfile by xianhu: build a docker image 2 | # centos6: 3 | # docker build -t user/centos6 . 4 | # docker run -it --name test [-p -v] user/centos6 5 | # docker attach test 6 | # centos7: 7 | # docker build -t user/centos7 . 8 | # docker run -dt --privileged --name test [-p -v] user/centos7 9 | # docker exec -it test /bin/bash 10 | 11 | FROM centos:6.9 12 | MAINTAINER xianhu 13 | 14 | # change system environments 15 | ENV LANG en_US.UTF-8 16 | ENV LC_ALL en_US.UTF-8 17 | 18 | # change system local time 19 | RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime 20 | 21 | # fix: warning: rpmts_HdrFromFdno 22 | RUN rpm --import /etc/pki/rpm-gpg/RPM* 23 | 24 | # update yum and install something 25 | RUN yum update -y 26 | RUN yum install -y xz 27 | RUN yum install -y vim 28 | RUN yum install -y git 29 | RUN yum install -y gcc 30 | RUN yum install -y make 31 | RUN yum install -y wget 32 | RUN yum install -y screen 33 | RUN yum install -y gcc-c++ 34 | RUN yum install -y crontabs 35 | RUN yum install -y zlib-devel 36 | RUN yum install -y sqlite-devel 37 | RUN yum install -y openssl-devel 38 | 39 | # install python 40 | RUN yum install -y https://centos6.iuscommunity.org/ius-release.rpm 41 | # RUN yum install -y https://centos7.iuscommunity.org/ius-release.rpm 42 | RUN yum install -y python36u 43 | RUN yum install -y python36u-pip 44 | RUN yum install -y python36u-devel 45 | 46 | # install nginx 47 | RUN yum install -y epel-release 48 | RUN yum install -y nginx 49 | 50 | # clean yum cache 51 | RUN yum clean all 52 | 53 | # install libs of python3 54 | ADD ./requirements.txt /root/ 55 | WORKDIR /root/ 56 | RUN pip3.6 install --upgrade pip 57 | RUN pip3.6 install -r requirements.txt 58 | 59 | # clean everything 60 | RUN rm -rf /root/* 61 | 62 | # centos6 63 | CMD /bin/bash 64 | 65 | # centos7 66 | # ENTRYPOINT /usr/sbin/init 67 | -------------------------------------------------------------------------------- /Docker/requirements.txt: -------------------------------------------------------------------------------- 1 | virtualenv 2 | pylint 3 | lxml 4 | html5lib 5 | xmltodict 6 | pybloom_live 7 | fake-useragent 8 | beautifulsoup4 9 | aiohttp 10 | requests 11 | redis 12 | PyMySQL 13 | elasticsearch 14 | Flask 15 | Flask-WTF 16 | Flask-Mail 17 | Flask-Login 18 | Flask-Moment 19 | Flask-Script 20 | Flask-RESTful 21 | Flask-Migrate 22 | Flask-HTTPAuth 23 | Flask-Bootstrap 24 | Flask-SQLAlchemy 25 | gunicorn 26 | uWSGI 27 | numpy 28 | scipy 29 | matplotlib 30 | pandas 31 | scikit-learn 32 | jupyter 33 | -------------------------------------------------------------------------------- /MyShow/GetData_zhihu.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | import pymysql 4 | 5 | con = pymysql.connect(host="xxxx", user="root", passwd="xxxx", db="xxxx", charset="utf8") 6 | cursor = con.cursor() 7 | con.autocommit(1) 8 | 9 | 10 | def get_all_topics(): 11 | cursor.execute("select distinct t_topic_id, t_topic_name from t_zhihutopics where t_topic_haschildren = 1;") 12 | return [item for item in cursor.fetchall() if item[0].strip()] 13 | 14 | 15 | def get_topic_data(topic_id, topic_name): 16 | data_dict = { 17 | "type": "force", 18 | "nodes": [ 19 | {"id": topic_id, "name": topic_name, "level": 0} 20 | ], 21 | "links": [] 22 | } 23 | 24 | nodes_set = set([topic_id]) 25 | dai_ids = set([topic_id]) 26 | while dai_ids: 27 | cursor.execute("select * from t_zhihutopics where t_topic_parentid = %s;", [dai_ids.pop()]) 28 | for item in cursor.fetchall(): 29 | _, t_id, t_name, t_pid, t_haschild, _ = item 30 | 31 | if t_id not in nodes_set: 32 | nodes_set.add(t_id) 33 | data_dict["nodes"].append({"id": t_id, "name": t_name, "level": 1 if t_pid == topic_id else 2}) 34 | data_dict["links"].append({"source": t_pid, "target": t_id}) 35 | 36 | if t_haschild == 1: 37 | dai_ids.add(t_id) 38 | return data_dict 39 | -------------------------------------------------------------------------------- /MyShow/MyShow.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | import logging 4 | import GetData_zhihu 5 | 6 | # flask 7 | from flask import Flask, session, request 8 | from flask import render_template, flash, redirect, url_for, jsonify 9 | 10 | # flask extends 11 | from flask_bootstrap import Bootstrap 12 | from flask_wtf import FlaskForm 13 | from wtforms import StringField, PasswordField, SubmitField 14 | from wtforms.validators import Length, Email 15 | 16 | # application 17 | app = Flask(__name__) 18 | app.config["SECRET_KEY"] = "hard to guess string" 19 | 20 | # manager and bootstrap 21 | bootstrap = Bootstrap(app=app) 22 | 23 | # global data 24 | zhihu_all_topics = GetData_zhihu.get_all_topics() 25 | zhihu_all_topics_key = {} 26 | zhihu_init_topics = GetData_zhihu.get_topic_data(topic_id="19559424", topic_name="数据分析") 27 | 28 | 29 | # form class 30 | class UserForm(FlaskForm): 31 | name = StringField("name", validators=[Email(message="邮箱格式不正确!")]) 32 | password = PasswordField("password", validators=[Length(min=6, message="密码长度至少6位!")]) 33 | submit = SubmitField("提 交") 34 | 35 | 36 | @app.route("/", methods=["GET", "POST"]) 37 | def temp(): 38 | return redirect(url_for("index")) 39 | 40 | 41 | @app.route("/index/", methods=["GET", "POST"]) 42 | def index(): 43 | user_form = UserForm() 44 | if request.method == "POST": 45 | if user_form.validate_on_submit(): 46 | session["username"] = user_form.name.data 47 | else: 48 | flash(user_form.errors["name"][0] if "name" in user_form.errors else user_form.errors["password"][0]) 49 | else: 50 | if request.args.get("action") == "login_out": 51 | flash("您已成功退出系统!") 52 | session["username"] = None 53 | return redirect(url_for("index")) 54 | elif request.args.get("action") == "overview": 55 | session["page_type"] = "overview" 56 | return redirect(url_for("index")) 57 | elif request.args.get("action") == "zhihu_topics": 58 | session["page_type"] = "zhihu_topics" 59 | return redirect(url_for("index")) 60 | return render_template("index.html", name=session.get("username"), page_type=session.get("page_type", "overview"), form=user_form) 61 | 62 | 63 | @app.route("/zhihu_get_topics_list/", methods=["post"]) 64 | def zhihu_get_topics_list(): 65 | key = request.form.get("key") 66 | result = {"success": 1, "data": []} 67 | if key: 68 | if key in zhihu_all_topics_key: 69 | result = zhihu_all_topics_key[key] 70 | else: 71 | for item in zhihu_all_topics: 72 | if item[1].find(key) >= 0: 73 | result["data"].append({"id": item[0], "name": item[1]}) 74 | if len(result["data"]) > 0: 75 | result["success"] = 1 76 | zhihu_all_topics_key[key] = result 77 | logging.debug("all_topics_key increase: %s", len(zhihu_all_topics_key)) 78 | return jsonify(result) 79 | 80 | 81 | @app.route("/zhihu_get_topics_data/", methods=["post"]) 82 | def zhihu_get_topics_data(): 83 | if request.form["id"] == "19554449": 84 | result = zhihu_init_topics 85 | else: 86 | result = GetData_zhihu.get_topic_data(request.form["id"], request.form["name"]) 87 | return jsonify(result) 88 | 89 | 90 | @app.errorhandler(404) 91 | def page_not_found(excep): 92 | return render_template("error.html", error=excep, name=session.get("username")), 404 93 | 94 | 95 | # main process 96 | if __name__ == "__main__": 97 | logging.basicConfig(level=logging.DEBUG, format="%(asctime)s\t%(levelname)s\t%(message)s") 98 | logging.debug("app url_map: %s", app.url_map) 99 | 100 | app.run() 101 | -------------------------------------------------------------------------------- /MyShow/static/css/dashboard.css: -------------------------------------------------------------------------------- 1 | /* 2 | * Base structure 3 | */ 4 | 5 | /* Move down content because we have a fixed navbar that is 50px tall */ 6 | body { 7 | padding-top: 50px; 8 | } 9 | 10 | 11 | /* 12 | * Global add-ons 13 | */ 14 | 15 | .sub-header { 16 | padding-bottom: 10px; 17 | border-bottom: 1px solid #eee; 18 | } 19 | 20 | /* 21 | * Top navigation 22 | * Hide default border to remove 1px line. 23 | */ 24 | .navbar-fixed-top { 25 | border: 0; 26 | } 27 | 28 | /* 29 | * Sidebar 30 | */ 31 | 32 | /* Hide for mobile, show later */ 33 | .sidebar { 34 | display: none; 35 | } 36 | @media (min-width: 768px) { 37 | .sidebar { 38 | position: fixed; 39 | top: 51px; 40 | bottom: 0; 41 | left: 0; 42 | z-index: 1000; 43 | display: block; 44 | padding: 20px; 45 | overflow-x: hidden; 46 | overflow-y: auto; /* Scrollable contents if viewport is shorter than content. */ 47 | background-color: #f5f5f5; 48 | border-right: 1px solid #eee; 49 | } 50 | } 51 | 52 | /* Sidebar navigation */ 53 | .nav-sidebar { 54 | margin-right: -21px; /* 20px padding + 1px border */ 55 | margin-bottom: 20px; 56 | margin-left: -20px; 57 | } 58 | .nav-sidebar > li > a { 59 | padding-right: 20px; 60 | padding-left: 20px; 61 | } 62 | .nav-sidebar > .active > a, 63 | .nav-sidebar > .active > a:hover, 64 | .nav-sidebar > .active > a:focus { 65 | color: #fff; 66 | background-color: #428bca; 67 | } 68 | 69 | 70 | /* 71 | * Main content 72 | */ 73 | 74 | .main { 75 | padding: 20px; 76 | } 77 | @media (min-width: 768px) { 78 | .main { 79 | padding-right: 40px; 80 | padding-left: 40px; 81 | } 82 | } 83 | .main .page-header { 84 | margin-top: 0; 85 | } 86 | 87 | 88 | /* 89 | * Placeholder dashboard ideas 90 | */ 91 | 92 | .placeholders { 93 | margin-bottom: 30px; 94 | text-align: center; 95 | } 96 | .placeholders h4 { 97 | margin-bottom: 0; 98 | } 99 | .placeholder { 100 | margin-bottom: 20px; 101 | } 102 | .placeholder img { 103 | display: inline-block; 104 | border-radius: 50%; 105 | } 106 | -------------------------------------------------------------------------------- /MyShow/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackfrued/LearnPython/5477a83c8294243995460ff3bafe105f2a4e372f/MyShow/static/favicon.ico -------------------------------------------------------------------------------- /MyShow/static/js/bootstrap-typeahead.min.js: -------------------------------------------------------------------------------- 1 | (function(root,factory){"use strict";if(typeof module!=="undefined"&&module.exports){module.exports=factory(require("jquery"))}else if(typeof define==="function"&&define.amd){define(["jquery"],function($){return factory($)})}else{factory(root.jQuery)}})(this,function($){"use strict";var Typeahead=function(element,options){this.$element=$(element);this.options=$.extend({},$.fn.typeahead.defaults,options);this.matcher=this.options.matcher||this.matcher;this.sorter=this.options.sorter||this.sorter;this.select=this.options.select||this.select;this.autoSelect=typeof this.options.autoSelect=="boolean"?this.options.autoSelect:true;this.highlighter=this.options.highlighter||this.highlighter;this.render=this.options.render||this.render;this.updater=this.options.updater||this.updater;this.displayText=this.options.displayText||this.displayText;this.source=this.options.source;this.delay=this.options.delay;this.$menu=$(this.options.menu);this.$appendTo=this.options.appendTo?$(this.options.appendTo):null;this.fitToElement=typeof this.options.fitToElement=="boolean"?this.options.fitToElement:false;this.shown=false;this.listen();this.showHintOnFocus=typeof this.options.showHintOnFocus=="boolean"||this.options.showHintOnFocus==="all"?this.options.showHintOnFocus:false;this.afterSelect=this.options.afterSelect;this.addItem=false;this.value=this.$element.val()||this.$element.text()};Typeahead.prototype={constructor:Typeahead,select:function(){var val=this.$menu.find(".active").data("value");this.$element.data("active",val);if(this.autoSelect||val){var newVal=this.updater(val);if(!newVal){newVal=""}this.$element.val(this.displayText(newVal)||newVal).text(this.displayText(newVal)||newVal).change();this.afterSelect(newVal)}return this.hide()},updater:function(item){return item},setSource:function(source){this.source=source},show:function(){var pos=$.extend({},this.$element.position(),{height:this.$element[0].offsetHeight});var scrollHeight=typeof this.options.scrollHeight=="function"?this.options.scrollHeight.call():this.options.scrollHeight;var element;if(this.shown){element=this.$menu}else if(this.$appendTo){element=this.$menu.appendTo(this.$appendTo);this.hasSameParent=this.$appendTo.is(this.$element.parent())}else{element=this.$menu.insertAfter(this.$element);this.hasSameParent=true}if(!this.hasSameParent){element.css("position","fixed");var offset=this.$element.offset();pos.top=offset.top;pos.left=offset.left}var dropup=$(element).parent().hasClass("dropup");var newTop=dropup?"auto":pos.top+pos.height+scrollHeight;var right=$(element).hasClass("dropdown-menu-right");var newLeft=right?"auto":pos.left;element.css({top:newTop,left:newLeft}).show();if(this.options.fitToElement===true){element.css("width",this.$element.outerWidth()+"px")}this.shown=true;return this},hide:function(){this.$menu.hide();this.shown=false;return this},lookup:function(query){var items;if(typeof query!="undefined"&&query!==null){this.query=query}else{this.query=this.$element.val()||this.$element.text()||""}if(this.query.length0){this.$element.data("active",items[0])}else{this.$element.data("active",null)}if(this.options.addItem){items.push(this.options.addItem)}if(this.options.items=="all"){return this.render(items).show()}else{return this.render(items.slice(0,this.options.items)).show()}},matcher:function(item){var it=this.displayText(item);return~it.toLowerCase().indexOf(this.query.toLowerCase())},sorter:function(items){var beginswith=[];var caseSensitive=[];var caseInsensitive=[];var item;while(item=items.shift()){var it=this.displayText(item);if(!it.toLowerCase().indexOf(this.query.toLowerCase()))beginswith.push(item);else if(~it.indexOf(this.query))caseSensitive.push(item);else caseInsensitive.push(item)}return beginswith.concat(caseSensitive,caseInsensitive)},highlighter:function(item){var html=$("
");var query=this.query;var i=item.toLowerCase().indexOf(query.toLowerCase());var len=query.length;var leftPart;var middlePart;var rightPart;var strong;if(len===0){return html.text(item).html()}while(i>-1){leftPart=item.substr(0,i);middlePart=item.substr(i,len);rightPart=item.substr(i+len);strong=$("").text(middlePart);html.append(document.createTextNode(leftPart)).append(strong);item=rightPart;i=item.toLowerCase().indexOf(query.toLowerCase())}return html.append(document.createTextNode(item)).html()},render:function(items){var that=this;var self=this;var activeFound=false;var data=[];var _category=that.options.separator;$.each(items,function(key,value){if(key>0&&value[_category]!==items[key-1][_category]){data.push({__type:"divider"})}if(value[_category]&&(key===0||value[_category]!==items[key-1][_category])){data.push({__type:"category",name:value[_category]})}data.push(value)});items=$(data).map(function(i,item){if((item.__type||false)=="category"){return $(that.options.headerHtml).text(item.name)[0]}if((item.__type||false)=="divider"){return $(that.options.headerDivider)[0]}var text=self.displayText(item);i=$(that.options.item).data("value",item);i.find("a").html(that.highlighter(text,item));if(text==self.$element.val()){i.addClass("active");self.$element.data("active",item);activeFound=true}return i[0]});if(this.autoSelect&&!activeFound){items.filter(":not(.dropdown-header)").first().addClass("active");this.$element.data("active",items.first().data("value"))}this.$menu.html(items);return this},displayText:function(item){return typeof item!=="undefined"&&typeof item.name!="undefined"&&item.name||item},next:function(event){var active=this.$menu.find(".active").removeClass("active");var next=active.next();if(!next.length){next=$(this.$menu.find("li")[0])}next.addClass("active")},prev:function(event){var active=this.$menu.find(".active").removeClass("active");var prev=active.prev();if(!prev.length){prev=this.$menu.find("li").last()}prev.addClass("active")},listen:function(){this.$element.on("focus",$.proxy(this.focus,this)).on("blur",$.proxy(this.blur,this)).on("keypress",$.proxy(this.keypress,this)).on("input",$.proxy(this.input,this)).on("keyup",$.proxy(this.keyup,this));if(this.eventSupported("keydown")){this.$element.on("keydown",$.proxy(this.keydown,this))}this.$menu.on("click",$.proxy(this.click,this)).on("mouseenter","li",$.proxy(this.mouseenter,this)).on("mouseleave","li",$.proxy(this.mouseleave,this)).on("mousedown",$.proxy(this.mousedown,this))},destroy:function(){this.$element.data("typeahead",null);this.$element.data("active",null);this.$element.off("focus").off("blur").off("keypress").off("input").off("keyup");if(this.eventSupported("keydown")){this.$element.off("keydown")}this.$menu.remove();this.destroyed=true},eventSupported:function(eventName){var isSupported=eventName in this.$element;if(!isSupported){this.$element.setAttribute(eventName,"return;");isSupported=typeof this.$element[eventName]==="function"}return isSupported},move:function(e){if(!this.shown)return;switch(e.keyCode){case 9:case 13:case 27:e.preventDefault();break;case 38:if(e.shiftKey)return;e.preventDefault();this.prev();break;case 40:if(e.shiftKey)return;e.preventDefault();this.next();break}},keydown:function(e){this.suppressKeyPressRepeat=~$.inArray(e.keyCode,[40,38,9,13,27]);if(!this.shown&&e.keyCode==40){this.lookup()}else{this.move(e)}},keypress:function(e){if(this.suppressKeyPressRepeat)return;this.move(e)},input:function(e){var currentValue=this.$element.val()||this.$element.text();if(this.value!==currentValue){this.value=currentValue;this.lookup()}},keyup:function(e){if(this.destroyed){return}switch(e.keyCode){case 40:case 38:case 16:case 17:case 18:break;case 9:case 13:if(!this.shown)return;this.select();break;case 27:if(!this.shown)return;this.hide();break}},focus:function(e){if(!this.focused){this.focused=true;if(this.options.showHintOnFocus&&this.skipShowHintOnFocus!==true){if(this.options.showHintOnFocus==="all"){this.lookup("")}else{this.lookup()}}}if(this.skipShowHintOnFocus){this.skipShowHintOnFocus=false}},blur:function(e){if(!this.mousedover&&!this.mouseddown&&this.shown){this.hide();this.focused=false}else if(this.mouseddown){this.skipShowHintOnFocus=true;this.$element.focus();this.mouseddown=false}},click:function(e){e.preventDefault();this.skipShowHintOnFocus=true;this.select();this.$element.focus();this.hide()},mouseenter:function(e){this.mousedover=true;this.$menu.find(".active").removeClass("active");$(e.currentTarget).addClass("active")},mouseleave:function(e){this.mousedover=false;if(!this.focused&&this.shown)this.hide()},mousedown:function(e){this.mouseddown=true;this.$menu.one("mouseup",function(e){this.mouseddown=false}.bind(this))}};var old=$.fn.typeahead;$.fn.typeahead=function(option){var arg=arguments;if(typeof option=="string"&&option=="getActive"){return this.data("active")}return this.each(function(){var $this=$(this);var data=$this.data("typeahead");var options=typeof option=="object"&&option;if(!data)$this.data("typeahead",data=new Typeahead(this,options));if(typeof option=="string"&&data[option]){if(arg.length>1){data[option].apply(data,Array.prototype.slice.call(arg,1))}else{data[option]()}}})};$.fn.typeahead.defaults={source:[],items:8,menu:'',item:'
  • ',minLength:1,scrollHeight:0,autoSelect:true,afterSelect:$.noop,addItem:false,delay:0,separator:"category",headerHtml:'',headerDivider:''};$.fn.typeahead.Constructor=Typeahead;$.fn.typeahead.noConflict=function(){$.fn.typeahead=old;return this};$(document).on("focus.typeahead.data-api",'[data-provide="typeahead"]',function(e){var $this=$(this);if($this.data("typeahead"))return;$this.typeahead($this.data())})}); -------------------------------------------------------------------------------- /MyShow/static/js/bootstrap.min.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap v3.3.7 (http://getbootstrap.com) 3 | * Copyright 2011-2016 Twitter, Inc. 4 | * Licensed under the MIT license 5 | */ 6 | if("undefined"==typeof jQuery)throw new Error("Bootstrap's JavaScript requires jQuery");+function(a){"use strict";var b=a.fn.jquery.split(" ")[0].split(".");if(b[0]<2&&b[1]<9||1==b[0]&&9==b[1]&&b[2]<1||b[0]>3)throw new Error("Bootstrap's JavaScript requires jQuery version 1.9.1 or higher, but lower than version 4")}(jQuery),+function(a){"use strict";function b(){var a=document.createElement("bootstrap"),b={WebkitTransition:"webkitTransitionEnd",MozTransition:"transitionend",OTransition:"oTransitionEnd otransitionend",transition:"transitionend"};for(var c in b)if(void 0!==a.style[c])return{end:b[c]};return!1}a.fn.emulateTransitionEnd=function(b){var c=!1,d=this;a(this).one("bsTransitionEnd",function(){c=!0});var e=function(){c||a(d).trigger(a.support.transition.end)};return setTimeout(e,b),this},a(function(){a.support.transition=b(),a.support.transition&&(a.event.special.bsTransitionEnd={bindType:a.support.transition.end,delegateType:a.support.transition.end,handle:function(b){if(a(b.target).is(this))return b.handleObj.handler.apply(this,arguments)}})})}(jQuery),+function(a){"use strict";function b(b){return this.each(function(){var c=a(this),e=c.data("bs.alert");e||c.data("bs.alert",e=new d(this)),"string"==typeof b&&e[b].call(c)})}var c='[data-dismiss="alert"]',d=function(b){a(b).on("click",c,this.close)};d.VERSION="3.3.7",d.TRANSITION_DURATION=150,d.prototype.close=function(b){function c(){g.detach().trigger("closed.bs.alert").remove()}var e=a(this),f=e.attr("data-target");f||(f=e.attr("href"),f=f&&f.replace(/.*(?=#[^\s]*$)/,""));var g=a("#"===f?[]:f);b&&b.preventDefault(),g.length||(g=e.closest(".alert")),g.trigger(b=a.Event("close.bs.alert")),b.isDefaultPrevented()||(g.removeClass("in"),a.support.transition&&g.hasClass("fade")?g.one("bsTransitionEnd",c).emulateTransitionEnd(d.TRANSITION_DURATION):c())};var e=a.fn.alert;a.fn.alert=b,a.fn.alert.Constructor=d,a.fn.alert.noConflict=function(){return a.fn.alert=e,this},a(document).on("click.bs.alert.data-api",c,d.prototype.close)}(jQuery),+function(a){"use strict";function b(b){return this.each(function(){var d=a(this),e=d.data("bs.button"),f="object"==typeof b&&b;e||d.data("bs.button",e=new c(this,f)),"toggle"==b?e.toggle():b&&e.setState(b)})}var c=function(b,d){this.$element=a(b),this.options=a.extend({},c.DEFAULTS,d),this.isLoading=!1};c.VERSION="3.3.7",c.DEFAULTS={loadingText:"loading..."},c.prototype.setState=function(b){var c="disabled",d=this.$element,e=d.is("input")?"val":"html",f=d.data();b+="Text",null==f.resetText&&d.data("resetText",d[e]()),setTimeout(a.proxy(function(){d[e](null==f[b]?this.options[b]:f[b]),"loadingText"==b?(this.isLoading=!0,d.addClass(c).attr(c,c).prop(c,!0)):this.isLoading&&(this.isLoading=!1,d.removeClass(c).removeAttr(c).prop(c,!1))},this),0)},c.prototype.toggle=function(){var a=!0,b=this.$element.closest('[data-toggle="buttons"]');if(b.length){var c=this.$element.find("input");"radio"==c.prop("type")?(c.prop("checked")&&(a=!1),b.find(".active").removeClass("active"),this.$element.addClass("active")):"checkbox"==c.prop("type")&&(c.prop("checked")!==this.$element.hasClass("active")&&(a=!1),this.$element.toggleClass("active")),c.prop("checked",this.$element.hasClass("active")),a&&c.trigger("change")}else this.$element.attr("aria-pressed",!this.$element.hasClass("active")),this.$element.toggleClass("active")};var d=a.fn.button;a.fn.button=b,a.fn.button.Constructor=c,a.fn.button.noConflict=function(){return a.fn.button=d,this},a(document).on("click.bs.button.data-api",'[data-toggle^="button"]',function(c){var d=a(c.target).closest(".btn");b.call(d,"toggle"),a(c.target).is('input[type="radio"], input[type="checkbox"]')||(c.preventDefault(),d.is("input,button")?d.trigger("focus"):d.find("input:visible,button:visible").first().trigger("focus"))}).on("focus.bs.button.data-api blur.bs.button.data-api",'[data-toggle^="button"]',function(b){a(b.target).closest(".btn").toggleClass("focus",/^focus(in)?$/.test(b.type))})}(jQuery),+function(a){"use strict";function b(b){return this.each(function(){var d=a(this),e=d.data("bs.carousel"),f=a.extend({},c.DEFAULTS,d.data(),"object"==typeof b&&b),g="string"==typeof b?b:f.slide;e||d.data("bs.carousel",e=new c(this,f)),"number"==typeof b?e.to(b):g?e[g]():f.interval&&e.pause().cycle()})}var c=function(b,c){this.$element=a(b),this.$indicators=this.$element.find(".carousel-indicators"),this.options=c,this.paused=null,this.sliding=null,this.interval=null,this.$active=null,this.$items=null,this.options.keyboard&&this.$element.on("keydown.bs.carousel",a.proxy(this.keydown,this)),"hover"==this.options.pause&&!("ontouchstart"in document.documentElement)&&this.$element.on("mouseenter.bs.carousel",a.proxy(this.pause,this)).on("mouseleave.bs.carousel",a.proxy(this.cycle,this))};c.VERSION="3.3.7",c.TRANSITION_DURATION=600,c.DEFAULTS={interval:5e3,pause:"hover",wrap:!0,keyboard:!0},c.prototype.keydown=function(a){if(!/input|textarea/i.test(a.target.tagName)){switch(a.which){case 37:this.prev();break;case 39:this.next();break;default:return}a.preventDefault()}},c.prototype.cycle=function(b){return b||(this.paused=!1),this.interval&&clearInterval(this.interval),this.options.interval&&!this.paused&&(this.interval=setInterval(a.proxy(this.next,this),this.options.interval)),this},c.prototype.getItemIndex=function(a){return this.$items=a.parent().children(".item"),this.$items.index(a||this.$active)},c.prototype.getItemForDirection=function(a,b){var c=this.getItemIndex(b),d="prev"==a&&0===c||"next"==a&&c==this.$items.length-1;if(d&&!this.options.wrap)return b;var e="prev"==a?-1:1,f=(c+e)%this.$items.length;return this.$items.eq(f)},c.prototype.to=function(a){var b=this,c=this.getItemIndex(this.$active=this.$element.find(".item.active"));if(!(a>this.$items.length-1||a<0))return this.sliding?this.$element.one("slid.bs.carousel",function(){b.to(a)}):c==a?this.pause().cycle():this.slide(a>c?"next":"prev",this.$items.eq(a))},c.prototype.pause=function(b){return b||(this.paused=!0),this.$element.find(".next, .prev").length&&a.support.transition&&(this.$element.trigger(a.support.transition.end),this.cycle(!0)),this.interval=clearInterval(this.interval),this},c.prototype.next=function(){if(!this.sliding)return this.slide("next")},c.prototype.prev=function(){if(!this.sliding)return this.slide("prev")},c.prototype.slide=function(b,d){var e=this.$element.find(".item.active"),f=d||this.getItemForDirection(b,e),g=this.interval,h="next"==b?"left":"right",i=this;if(f.hasClass("active"))return this.sliding=!1;var j=f[0],k=a.Event("slide.bs.carousel",{relatedTarget:j,direction:h});if(this.$element.trigger(k),!k.isDefaultPrevented()){if(this.sliding=!0,g&&this.pause(),this.$indicators.length){this.$indicators.find(".active").removeClass("active");var l=a(this.$indicators.children()[this.getItemIndex(f)]);l&&l.addClass("active")}var m=a.Event("slid.bs.carousel",{relatedTarget:j,direction:h});return a.support.transition&&this.$element.hasClass("slide")?(f.addClass(b),f[0].offsetWidth,e.addClass(h),f.addClass(h),e.one("bsTransitionEnd",function(){f.removeClass([b,h].join(" ")).addClass("active"),e.removeClass(["active",h].join(" ")),i.sliding=!1,setTimeout(function(){i.$element.trigger(m)},0)}).emulateTransitionEnd(c.TRANSITION_DURATION)):(e.removeClass("active"),f.addClass("active"),this.sliding=!1,this.$element.trigger(m)),g&&this.cycle(),this}};var d=a.fn.carousel;a.fn.carousel=b,a.fn.carousel.Constructor=c,a.fn.carousel.noConflict=function(){return a.fn.carousel=d,this};var e=function(c){var d,e=a(this),f=a(e.attr("data-target")||(d=e.attr("href"))&&d.replace(/.*(?=#[^\s]+$)/,""));if(f.hasClass("carousel")){var g=a.extend({},f.data(),e.data()),h=e.attr("data-slide-to");h&&(g.interval=!1),b.call(f,g),h&&f.data("bs.carousel").to(h),c.preventDefault()}};a(document).on("click.bs.carousel.data-api","[data-slide]",e).on("click.bs.carousel.data-api","[data-slide-to]",e),a(window).on("load",function(){a('[data-ride="carousel"]').each(function(){var c=a(this);b.call(c,c.data())})})}(jQuery),+function(a){"use strict";function b(b){var c,d=b.attr("data-target")||(c=b.attr("href"))&&c.replace(/.*(?=#[^\s]+$)/,"");return a(d)}function c(b){return this.each(function(){var c=a(this),e=c.data("bs.collapse"),f=a.extend({},d.DEFAULTS,c.data(),"object"==typeof b&&b);!e&&f.toggle&&/show|hide/.test(b)&&(f.toggle=!1),e||c.data("bs.collapse",e=new d(this,f)),"string"==typeof b&&e[b]()})}var d=function(b,c){this.$element=a(b),this.options=a.extend({},d.DEFAULTS,c),this.$trigger=a('[data-toggle="collapse"][href="#'+b.id+'"],[data-toggle="collapse"][data-target="#'+b.id+'"]'),this.transitioning=null,this.options.parent?this.$parent=this.getParent():this.addAriaAndCollapsedClass(this.$element,this.$trigger),this.options.toggle&&this.toggle()};d.VERSION="3.3.7",d.TRANSITION_DURATION=350,d.DEFAULTS={toggle:!0},d.prototype.dimension=function(){var a=this.$element.hasClass("width");return a?"width":"height"},d.prototype.show=function(){if(!this.transitioning&&!this.$element.hasClass("in")){var b,e=this.$parent&&this.$parent.children(".panel").children(".in, .collapsing");if(!(e&&e.length&&(b=e.data("bs.collapse"),b&&b.transitioning))){var f=a.Event("show.bs.collapse");if(this.$element.trigger(f),!f.isDefaultPrevented()){e&&e.length&&(c.call(e,"hide"),b||e.data("bs.collapse",null));var g=this.dimension();this.$element.removeClass("collapse").addClass("collapsing")[g](0).attr("aria-expanded",!0),this.$trigger.removeClass("collapsed").attr("aria-expanded",!0),this.transitioning=1;var h=function(){this.$element.removeClass("collapsing").addClass("collapse in")[g](""),this.transitioning=0,this.$element.trigger("shown.bs.collapse")};if(!a.support.transition)return h.call(this);var i=a.camelCase(["scroll",g].join("-"));this.$element.one("bsTransitionEnd",a.proxy(h,this)).emulateTransitionEnd(d.TRANSITION_DURATION)[g](this.$element[0][i])}}}},d.prototype.hide=function(){if(!this.transitioning&&this.$element.hasClass("in")){var b=a.Event("hide.bs.collapse");if(this.$element.trigger(b),!b.isDefaultPrevented()){var c=this.dimension();this.$element[c](this.$element[c]())[0].offsetHeight,this.$element.addClass("collapsing").removeClass("collapse in").attr("aria-expanded",!1),this.$trigger.addClass("collapsed").attr("aria-expanded",!1),this.transitioning=1;var e=function(){this.transitioning=0,this.$element.removeClass("collapsing").addClass("collapse").trigger("hidden.bs.collapse")};return a.support.transition?void this.$element[c](0).one("bsTransitionEnd",a.proxy(e,this)).emulateTransitionEnd(d.TRANSITION_DURATION):e.call(this)}}},d.prototype.toggle=function(){this[this.$element.hasClass("in")?"hide":"show"]()},d.prototype.getParent=function(){return a(this.options.parent).find('[data-toggle="collapse"][data-parent="'+this.options.parent+'"]').each(a.proxy(function(c,d){var e=a(d);this.addAriaAndCollapsedClass(b(e),e)},this)).end()},d.prototype.addAriaAndCollapsedClass=function(a,b){var c=a.hasClass("in");a.attr("aria-expanded",c),b.toggleClass("collapsed",!c).attr("aria-expanded",c)};var e=a.fn.collapse;a.fn.collapse=c,a.fn.collapse.Constructor=d,a.fn.collapse.noConflict=function(){return a.fn.collapse=e,this},a(document).on("click.bs.collapse.data-api",'[data-toggle="collapse"]',function(d){var e=a(this);e.attr("data-target")||d.preventDefault();var f=b(e),g=f.data("bs.collapse"),h=g?"toggle":e.data();c.call(f,h)})}(jQuery),+function(a){"use strict";function b(b){var c=b.attr("data-target");c||(c=b.attr("href"),c=c&&/#[A-Za-z]/.test(c)&&c.replace(/.*(?=#[^\s]*$)/,""));var d=c&&a(c);return d&&d.length?d:b.parent()}function c(c){c&&3===c.which||(a(e).remove(),a(f).each(function(){var d=a(this),e=b(d),f={relatedTarget:this};e.hasClass("open")&&(c&&"click"==c.type&&/input|textarea/i.test(c.target.tagName)&&a.contains(e[0],c.target)||(e.trigger(c=a.Event("hide.bs.dropdown",f)),c.isDefaultPrevented()||(d.attr("aria-expanded","false"),e.removeClass("open").trigger(a.Event("hidden.bs.dropdown",f)))))}))}function d(b){return this.each(function(){var c=a(this),d=c.data("bs.dropdown");d||c.data("bs.dropdown",d=new g(this)),"string"==typeof b&&d[b].call(c)})}var e=".dropdown-backdrop",f='[data-toggle="dropdown"]',g=function(b){a(b).on("click.bs.dropdown",this.toggle)};g.VERSION="3.3.7",g.prototype.toggle=function(d){var e=a(this);if(!e.is(".disabled, :disabled")){var f=b(e),g=f.hasClass("open");if(c(),!g){"ontouchstart"in document.documentElement&&!f.closest(".navbar-nav").length&&a(document.createElement("div")).addClass("dropdown-backdrop").insertAfter(a(this)).on("click",c);var h={relatedTarget:this};if(f.trigger(d=a.Event("show.bs.dropdown",h)),d.isDefaultPrevented())return;e.trigger("focus").attr("aria-expanded","true"),f.toggleClass("open").trigger(a.Event("shown.bs.dropdown",h))}return!1}},g.prototype.keydown=function(c){if(/(38|40|27|32)/.test(c.which)&&!/input|textarea/i.test(c.target.tagName)){var d=a(this);if(c.preventDefault(),c.stopPropagation(),!d.is(".disabled, :disabled")){var e=b(d),g=e.hasClass("open");if(!g&&27!=c.which||g&&27==c.which)return 27==c.which&&e.find(f).trigger("focus"),d.trigger("click");var h=" li:not(.disabled):visible a",i=e.find(".dropdown-menu"+h);if(i.length){var j=i.index(c.target);38==c.which&&j>0&&j--,40==c.which&&jdocument.documentElement.clientHeight;this.$element.css({paddingLeft:!this.bodyIsOverflowing&&a?this.scrollbarWidth:"",paddingRight:this.bodyIsOverflowing&&!a?this.scrollbarWidth:""})},c.prototype.resetAdjustments=function(){this.$element.css({paddingLeft:"",paddingRight:""})},c.prototype.checkScrollbar=function(){var a=window.innerWidth;if(!a){var b=document.documentElement.getBoundingClientRect();a=b.right-Math.abs(b.left)}this.bodyIsOverflowing=document.body.clientWidth
    ',trigger:"hover focus",title:"",delay:0,html:!1,container:!1,viewport:{selector:"body",padding:0}},c.prototype.init=function(b,c,d){if(this.enabled=!0,this.type=b,this.$element=a(c),this.options=this.getOptions(d),this.$viewport=this.options.viewport&&a(a.isFunction(this.options.viewport)?this.options.viewport.call(this,this.$element):this.options.viewport.selector||this.options.viewport),this.inState={click:!1,hover:!1,focus:!1},this.$element[0]instanceof document.constructor&&!this.options.selector)throw new Error("`selector` option must be specified when initializing "+this.type+" on the window.document object!");for(var e=this.options.trigger.split(" "),f=e.length;f--;){var g=e[f];if("click"==g)this.$element.on("click."+this.type,this.options.selector,a.proxy(this.toggle,this));else if("manual"!=g){var h="hover"==g?"mouseenter":"focusin",i="hover"==g?"mouseleave":"focusout";this.$element.on(h+"."+this.type,this.options.selector,a.proxy(this.enter,this)),this.$element.on(i+"."+this.type,this.options.selector,a.proxy(this.leave,this))}}this.options.selector?this._options=a.extend({},this.options,{trigger:"manual",selector:""}):this.fixTitle()},c.prototype.getDefaults=function(){return c.DEFAULTS},c.prototype.getOptions=function(b){return b=a.extend({},this.getDefaults(),this.$element.data(),b),b.delay&&"number"==typeof b.delay&&(b.delay={show:b.delay,hide:b.delay}),b},c.prototype.getDelegateOptions=function(){var b={},c=this.getDefaults();return this._options&&a.each(this._options,function(a,d){c[a]!=d&&(b[a]=d)}),b},c.prototype.enter=function(b){var c=b instanceof this.constructor?b:a(b.currentTarget).data("bs."+this.type);return c||(c=new this.constructor(b.currentTarget,this.getDelegateOptions()),a(b.currentTarget).data("bs."+this.type,c)),b instanceof a.Event&&(c.inState["focusin"==b.type?"focus":"hover"]=!0),c.tip().hasClass("in")||"in"==c.hoverState?void(c.hoverState="in"):(clearTimeout(c.timeout),c.hoverState="in",c.options.delay&&c.options.delay.show?void(c.timeout=setTimeout(function(){"in"==c.hoverState&&c.show()},c.options.delay.show)):c.show())},c.prototype.isInStateTrue=function(){for(var a in this.inState)if(this.inState[a])return!0;return!1},c.prototype.leave=function(b){var c=b instanceof this.constructor?b:a(b.currentTarget).data("bs."+this.type);if(c||(c=new this.constructor(b.currentTarget,this.getDelegateOptions()),a(b.currentTarget).data("bs."+this.type,c)),b instanceof a.Event&&(c.inState["focusout"==b.type?"focus":"hover"]=!1),!c.isInStateTrue())return clearTimeout(c.timeout),c.hoverState="out",c.options.delay&&c.options.delay.hide?void(c.timeout=setTimeout(function(){"out"==c.hoverState&&c.hide()},c.options.delay.hide)):c.hide()},c.prototype.show=function(){var b=a.Event("show.bs."+this.type);if(this.hasContent()&&this.enabled){this.$element.trigger(b);var d=a.contains(this.$element[0].ownerDocument.documentElement,this.$element[0]);if(b.isDefaultPrevented()||!d)return;var e=this,f=this.tip(),g=this.getUID(this.type);this.setContent(),f.attr("id",g),this.$element.attr("aria-describedby",g),this.options.animation&&f.addClass("fade");var h="function"==typeof this.options.placement?this.options.placement.call(this,f[0],this.$element[0]):this.options.placement,i=/\s?auto?\s?/i,j=i.test(h);j&&(h=h.replace(i,"")||"top"),f.detach().css({top:0,left:0,display:"block"}).addClass(h).data("bs."+this.type,this),this.options.container?f.appendTo(this.options.container):f.insertAfter(this.$element),this.$element.trigger("inserted.bs."+this.type);var k=this.getPosition(),l=f[0].offsetWidth,m=f[0].offsetHeight;if(j){var n=h,o=this.getPosition(this.$viewport);h="bottom"==h&&k.bottom+m>o.bottom?"top":"top"==h&&k.top-mo.width?"left":"left"==h&&k.left-lg.top+g.height&&(e.top=g.top+g.height-i)}else{var j=b.left-f,k=b.left+f+c;jg.right&&(e.left=g.left+g.width-k)}return e},c.prototype.getTitle=function(){var a,b=this.$element,c=this.options;return a=b.attr("data-original-title")||("function"==typeof c.title?c.title.call(b[0]):c.title)},c.prototype.getUID=function(a){do a+=~~(1e6*Math.random());while(document.getElementById(a));return a},c.prototype.tip=function(){if(!this.$tip&&(this.$tip=a(this.options.template),1!=this.$tip.length))throw new Error(this.type+" `template` option must consist of exactly 1 top-level element!");return this.$tip},c.prototype.arrow=function(){return this.$arrow=this.$arrow||this.tip().find(".tooltip-arrow")},c.prototype.enable=function(){this.enabled=!0},c.prototype.disable=function(){this.enabled=!1},c.prototype.toggleEnabled=function(){this.enabled=!this.enabled},c.prototype.toggle=function(b){var c=this;b&&(c=a(b.currentTarget).data("bs."+this.type),c||(c=new this.constructor(b.currentTarget,this.getDelegateOptions()),a(b.currentTarget).data("bs."+this.type,c))),b?(c.inState.click=!c.inState.click,c.isInStateTrue()?c.enter(c):c.leave(c)):c.tip().hasClass("in")?c.leave(c):c.enter(c)},c.prototype.destroy=function(){var a=this;clearTimeout(this.timeout),this.hide(function(){a.$element.off("."+a.type).removeData("bs."+a.type),a.$tip&&a.$tip.detach(),a.$tip=null,a.$arrow=null,a.$viewport=null,a.$element=null})};var d=a.fn.tooltip;a.fn.tooltip=b,a.fn.tooltip.Constructor=c,a.fn.tooltip.noConflict=function(){return a.fn.tooltip=d,this}}(jQuery),+function(a){"use strict";function b(b){return this.each(function(){var d=a(this),e=d.data("bs.popover"),f="object"==typeof b&&b;!e&&/destroy|hide/.test(b)||(e||d.data("bs.popover",e=new c(this,f)),"string"==typeof b&&e[b]())})}var c=function(a,b){this.init("popover",a,b)};if(!a.fn.tooltip)throw new Error("Popover requires tooltip.js");c.VERSION="3.3.7",c.DEFAULTS=a.extend({},a.fn.tooltip.Constructor.DEFAULTS,{placement:"right",trigger:"click",content:"",template:''}),c.prototype=a.extend({},a.fn.tooltip.Constructor.prototype),c.prototype.constructor=c,c.prototype.getDefaults=function(){return c.DEFAULTS},c.prototype.setContent=function(){var a=this.tip(),b=this.getTitle(),c=this.getContent();a.find(".popover-title")[this.options.html?"html":"text"](b),a.find(".popover-content").children().detach().end()[this.options.html?"string"==typeof c?"html":"append":"text"](c),a.removeClass("fade top bottom left right in"),a.find(".popover-title").html()||a.find(".popover-title").hide()},c.prototype.hasContent=function(){return this.getTitle()||this.getContent()},c.prototype.getContent=function(){var a=this.$element,b=this.options;return a.attr("data-content")||("function"==typeof b.content?b.content.call(a[0]):b.content)},c.prototype.arrow=function(){return this.$arrow=this.$arrow||this.tip().find(".arrow")};var d=a.fn.popover;a.fn.popover=b,a.fn.popover.Constructor=c,a.fn.popover.noConflict=function(){return a.fn.popover=d,this}}(jQuery),+function(a){"use strict";function b(c,d){this.$body=a(document.body),this.$scrollElement=a(a(c).is(document.body)?window:c),this.options=a.extend({},b.DEFAULTS,d),this.selector=(this.options.target||"")+" .nav li > a",this.offsets=[],this.targets=[],this.activeTarget=null,this.scrollHeight=0,this.$scrollElement.on("scroll.bs.scrollspy",a.proxy(this.process,this)),this.refresh(),this.process()}function c(c){return this.each(function(){var d=a(this),e=d.data("bs.scrollspy"),f="object"==typeof c&&c;e||d.data("bs.scrollspy",e=new b(this,f)),"string"==typeof c&&e[c]()})}b.VERSION="3.3.7",b.DEFAULTS={offset:10},b.prototype.getScrollHeight=function(){return this.$scrollElement[0].scrollHeight||Math.max(this.$body[0].scrollHeight,document.documentElement.scrollHeight)},b.prototype.refresh=function(){var b=this,c="offset",d=0;this.offsets=[],this.targets=[],this.scrollHeight=this.getScrollHeight(),a.isWindow(this.$scrollElement[0])||(c="position",d=this.$scrollElement.scrollTop()),this.$body.find(this.selector).map(function(){var b=a(this),e=b.data("target")||b.attr("href"),f=/^#./.test(e)&&a(e);return f&&f.length&&f.is(":visible")&&[[f[c]().top+d,e]]||null}).sort(function(a,b){return a[0]-b[0]}).each(function(){b.offsets.push(this[0]),b.targets.push(this[1])})},b.prototype.process=function(){var a,b=this.$scrollElement.scrollTop()+this.options.offset,c=this.getScrollHeight(),d=this.options.offset+c-this.$scrollElement.height(),e=this.offsets,f=this.targets,g=this.activeTarget;if(this.scrollHeight!=c&&this.refresh(),b>=d)return g!=(a=f[f.length-1])&&this.activate(a);if(g&&b=e[a]&&(void 0===e[a+1]||b .dropdown-menu > .active").removeClass("active").end().find('[data-toggle="tab"]').attr("aria-expanded",!1),b.addClass("active").find('[data-toggle="tab"]').attr("aria-expanded",!0),h?(b[0].offsetWidth,b.addClass("in")):b.removeClass("fade"),b.parent(".dropdown-menu").length&&b.closest("li.dropdown").addClass("active").end().find('[data-toggle="tab"]').attr("aria-expanded",!0),e&&e()}var g=d.find("> .active"),h=e&&a.support.transition&&(g.length&&g.hasClass("fade")||!!d.find("> .fade").length);g.length&&h?g.one("bsTransitionEnd",f).emulateTransitionEnd(c.TRANSITION_DURATION):f(),g.removeClass("in")};var d=a.fn.tab;a.fn.tab=b,a.fn.tab.Constructor=c,a.fn.tab.noConflict=function(){return a.fn.tab=d,this};var e=function(c){c.preventDefault(),b.call(a(this),"show")};a(document).on("click.bs.tab.data-api",'[data-toggle="tab"]',e).on("click.bs.tab.data-api",'[data-toggle="pill"]',e)}(jQuery),+function(a){"use strict";function b(b){return this.each(function(){var d=a(this),e=d.data("bs.affix"),f="object"==typeof b&&b;e||d.data("bs.affix",e=new c(this,f)),"string"==typeof b&&e[b]()})}var c=function(b,d){this.options=a.extend({},c.DEFAULTS,d),this.$target=a(this.options.target).on("scroll.bs.affix.data-api",a.proxy(this.checkPosition,this)).on("click.bs.affix.data-api",a.proxy(this.checkPositionWithEventLoop,this)),this.$element=a(b),this.affixed=null,this.unpin=null,this.pinnedOffset=null,this.checkPosition()};c.VERSION="3.3.7",c.RESET="affix affix-top affix-bottom",c.DEFAULTS={offset:0,target:window},c.prototype.getState=function(a,b,c,d){var e=this.$target.scrollTop(),f=this.$element.offset(),g=this.$target.height();if(null!=c&&"top"==this.affixed)return e=a-d&&"bottom"},c.prototype.getPinnedOffset=function(){if(this.pinnedOffset)return this.pinnedOffset;this.$element.removeClass(c.RESET).addClass("affix");var a=this.$target.scrollTop(),b=this.$element.offset();return this.pinnedOffset=b.top-a},c.prototype.checkPositionWithEventLoop=function(){setTimeout(a.proxy(this.checkPosition,this),1)},c.prototype.checkPosition=function(){if(this.$element.is(":visible")){var b=this.$element.height(),d=this.options.offset,e=d.top,f=d.bottom,g=Math.max(a(document).height(),a(document.body).height());"object"!=typeof d&&(f=e=d),"function"==typeof e&&(e=d.top(this.$element)),"function"==typeof f&&(f=d.bottom(this.$element));var h=this.getState(g,b,e,f);if(this.affixed!=h){null!=this.unpin&&this.$element.css("top","");var i="affix"+(h?"-"+h:""),j=a.Event(i+".bs.affix");if(this.$element.trigger(j),j.isDefaultPrevented())return;this.affixed=h,this.unpin="bottom"==h?this.getPinnedOffset():null,this.$element.removeClass(c.RESET).addClass(i).trigger(i.replace("affix","affixed")+".bs.affix")}"bottom"==h&&this.$element.offset({top:g-b-f})}};var d=a.fn.affix;a.fn.affix=b,a.fn.affix.Constructor=c,a.fn.affix.noConflict=function(){return a.fn.affix=d,this},a(window).on("load",function(){a('[data-spy="affix"]').each(function(){var c=a(this),d=c.data();d.offset=d.offset||{},null!=d.offsetBottom&&(d.offset.bottom=d.offsetBottom),null!=d.offsetTop&&(d.offset.top=d.offsetTop),b.call(c,d)})})}(jQuery); -------------------------------------------------------------------------------- /MyShow/templates/base.html: -------------------------------------------------------------------------------- 1 | {% extends "bootstrap/base.html" %} 2 | {% block title %}MyShow{% endblock %} 3 | 4 | {% block head %} 5 | {{ super() }} 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | {% endblock %} 20 | 21 | {% block navbar %} 22 | 53 | {% endblock %} 54 | 55 | {% block content %} 56 |
    57 | {% block page_content %} 58 | {% endblock %} 59 |
    60 | {% endblock %} 61 | -------------------------------------------------------------------------------- /MyShow/templates/error.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% block title %}数据展示 - Error{% endblock %} 3 | 4 | {% block page_content %} 5 | 8 | {% endblock %} 9 | -------------------------------------------------------------------------------- /MyShow/templates/index.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | {% import "bootstrap/wtf.html" as wtf %} 3 | 4 | {% block head %} 5 | {{ super() }} 6 | 7 | 123 | {% endblock %} 124 | 125 | {% block page_content %} 126 |
    127 | 137 | 138 |
    139 | {% for message in get_flashed_messages() %} 140 |
    141 | 142 | {{ message }} 143 |
    144 | {% endfor %} 145 | 146 | {% if page_type == "overview" %} 147 |

    数据展示,点击左边菜单栏即可查看各类数据展示页面

    148 | {% elif page_type == "zhihu_topics" %} 149 | 152 |
    153 | 154 | 157 | {% endif %} 158 |
    159 |
    160 | {% endblock %} 161 | 162 | {% block scripts %} 163 | 165 | {% endblock %} 166 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LearnPython 2 | 以撸代码的形式学习Python, 具体说明在[知乎专栏-撸代码,学知识](https://zhuanlan.zhihu.com/pythoner) 3 | 4 | =================================================================================================== 5 | ### python_base.py: 千行代码入门Python 6 | 7 | ### python_visual.py: 15张图入门Matplotlib 8 | 9 | ### python_visual_animation.py: 使用Matplotlib画动态图实例 10 | 11 | ### python_spider.py: 一个很“水”的Python爬虫入门代码文件 12 | 13 | ### python_weibo.py: “史上最详细”的Python模拟登录新浪微博流程 14 | 15 | ### python_lda.py: 玩点高级的--带你入门Topic模型LDA(小改进+附源码) 16 | 17 | ### python_sqlalchemy.py: 作为一个Pythoner, 不会SQLAlchemy都不好意思跟同行打招呼! 18 | 19 | ### python_oneline.py: 几个小例子告诉你, 一行Python代码能干哪些事 20 | 21 | ### python_requests.py: Python中最好用的爬虫库Requests代码实例 22 | 23 | ### python_functional.py: Python进阶: 函数式编程实例(附代码) 24 | 25 | ### python_decorator.py: Python进阶: 通过实例详解装饰器(附代码) 26 | 27 | ### python_datetime.py: 你真的了解Python中的日期时间处理吗? 28 | 29 | ### python_metaclass.py: Python进阶: 一步步理解Python中的元类metaclass 30 | 31 | ### python_coroutine.py: Python进阶: 理解Python中的异步IO和协程(Coroutine), 并应用在爬虫中 32 | 33 | ### python_aiohttp.py: Python中最好用的异步爬虫库Aiohttp代码实例 34 | 35 | ### python_thread_multiprocess.py: Python进阶: 聊聊IO密集型任务、计算密集型任务,以及多线程、多进程 36 | 37 | ### python_version36.py: Python3.6正式版要来了, 你期待哪些新特性? 38 | 39 | ### python_magic_methods: Python进阶: 实例讲解Python中的魔法函数(Magic Methods) 40 | 41 | ### python_restful_api.py: 利用Python和Flask快速开发RESTful API 42 | 43 | ### python_restful_api.py: RESTful API进阶: 连接数据库、添加参数、Token认证、返回代码说明等 44 | 45 | ### python_context.py: With语句和上下文管理器ContextManager 46 | 47 | ### python_flask.py: Flask相关说明 48 | 49 | ### MyShow: 玩点好玩的--知乎全部话题关系可视化 50 | 51 | ### python_markov_chain.py: 玩点好玩的--使用马尔可夫模型自动生成文章 52 | 53 | ### python_wechat.py: 玩点好玩的--自己写一个微信小助手 54 | 55 | ### python_csv.py: Python中CSV文件的简单读写 56 | 57 | ### python_numpy.py: 使用numpy进行矩阵操作 58 | =================================================================================================== 59 | 60 | ### 您可以fork该项目, 并在修改后提交Pull request, 看到后会尽量进行代码合并 61 | -------------------------------------------------------------------------------- /python_aiohttp.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | """ 4 | python_aiohttp.py by xianhu 5 | """ 6 | 7 | import asyncio 8 | import aiohttp 9 | 10 | 11 | # 简单实例 12 | async def aiohttp_test01(url): 13 | async with aiohttp.ClientSession() as session: 14 | async with session.get(url) as resp: 15 | print(resp.status) 16 | print(await resp.text()) 17 | 18 | loop = asyncio.get_event_loop() 19 | tasks = [aiohttp_test01("https://api.github.com/events")] 20 | loop.run_until_complete(asyncio.wait(tasks)) 21 | loop.close() 22 | 23 | # 其他Http方法 24 | # session.post('http://httpbin.org/post', data=b'data') 25 | # session.put('http://httpbin.org/put', data=b'data') 26 | # session.delete('http://httpbin.org/delete') 27 | # session.head('http://httpbin.org/get') 28 | # session.options('http://httpbin.org/get') 29 | # session.patch('http://httpbin.org/patch', data=b'data') 30 | 31 | # 自定义Headers 32 | # payload = {'some': 'data'} 33 | # headers = {'content-type': 'application/json'} 34 | # await session.post(url, data=json.dumps(payload), headers=headers) 35 | 36 | # 自定义Cookie 37 | # cookies = {'cookies_are': 'working'} 38 | # async with ClientSession(cookies=cookies) as session: 39 | # 访问Cookie: session.cookie_jar 40 | 41 | # 在URLs中传递参数 42 | # 1. params = {'key1': 'value1', 'key2': 'value2'} 43 | # 2. params = [('key', 'value1'), ('key', 'value2')] 44 | # async with session.get('http://httpbin.org/get', params=params) as resp: 45 | # assert resp.url == 'http://httpbin.org/get?key2=value2&key1=value1' 46 | 47 | # 发送数据 48 | # payload = {'key1': 'value1', 'key2': 'value2'} 49 | # async with session.post('http://httpbin.org/post', data=payload) as resp: 50 | # async with session.post(url, data=json.dumps(payload)) as resp: 51 | # print(await resp.text()) 52 | 53 | # 发送文件(1) 54 | # files = {'file': open('report.xls', 'rb')} 55 | # await session.post(url, data=files) 56 | 57 | # 发送数据(2) 58 | # data = FormData() 59 | # data.add_field('file', 60 | # open('report.xls', 'rb'), 61 | # filename='report.xls', 62 | # content_type='application/vnd.ms-excel') 63 | # await session.post(url, data=data) 64 | 65 | # 超时设置 66 | # aync with session.get('https://github.com', timeout=60) as r: 67 | 68 | # 代理支持 69 | # async with aiohttp.ClientSession() as session: 70 | # async with session.get("http://python.org", proxy="http://some.proxy.com") as resp: 71 | # print(resp.status) 72 | 73 | # async with aiohttp.ClientSession() as session: 74 | # proxy_auth = aiohttp.BasicAuth('user', 'pass') 75 | # async with session.get("http://python.org", proxy="http://some.proxy.com", proxy_auth=proxy_auth) as resp: 76 | # print(resp.status) 77 | # session.get("http://python.org", proxy="http://user:pass@some.proxy.com") 78 | 79 | # 返回的内容 80 | # async with session.get('https://api.github.com/events') as resp: 81 | # print(await resp.text()) 82 | # print(await resp.text(encoding='gbk')) 83 | # print(await resp.read()) 84 | # print(await resp.json()) 85 | 86 | # 返回内容较大 87 | # with open(filename, 'wb') as fd: 88 | # while True: 89 | # chunk = await resp.content.read(chunk_size) 90 | # if not chunk: 91 | # break 92 | # fd.write(chunk) 93 | 94 | # 返回的其他变量 95 | # async with session.get('http://httpbin.org/get') as resp: 96 | # print(resp.status) # 状态码 97 | # print(resp.headers) # Headers 98 | # print(resp.raw_headers) # 原始Headers 99 | # print(resp.cookies) # 返回的Cookie 100 | 101 | # 访问历史History 102 | # resp = await session.get('http://example.com/some/redirect/') 103 | # resp: 104 | # resp.history: (,) 105 | 106 | # 释放返回的Response 107 | # 1. async with session.get(url) as resp: pass 108 | # 2. await resp.release() 109 | 110 | # 连接器: Connectors 111 | # conn = aiohttp.TCPConnector() 112 | # session = aiohttp.ClientSession(connector=conn) 113 | 114 | # 限制连接池大小: 115 | # conn = aiohttp.TCPConnector(limit=30) 116 | # conn = aiohttp.TCPConnector(limit=None) 117 | -------------------------------------------------------------------------------- /python_context.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | """ 4 | python_context.py by xianhu 5 | """ 6 | 7 | import contextlib 8 | 9 | 10 | # 自定义打开文件操作 11 | class MyOpen(object): 12 | 13 | def __init__(self, file_name): 14 | """初始化方法""" 15 | self.file_name = file_name 16 | self.file_handler = None 17 | return 18 | 19 | def __enter__(self): 20 | """enter方法,返回file_handler""" 21 | print("enter:", self.file_name) 22 | self.file_handler = open(self.file_name, "r") 23 | return self.file_handler 24 | 25 | def __exit__(self, exc_type, exc_val, exc_tb): 26 | """exit方法,关闭文件并返回True""" 27 | print("exit:", exc_type, exc_val, exc_tb) 28 | if self.file_handler: 29 | self.file_handler.close() 30 | return True 31 | 32 | # 使用实例 33 | with MyOpen("python_base.py") as file_in: 34 | for line in file_in: 35 | print(line) 36 | raise ZeroDivisionError 37 | # 代码块中主动引发一个除零异常,但整个程序不会引发异常 38 | 39 | 40 | # 内置库contextlib的使用 41 | @contextlib.contextmanager 42 | def open_func(file_name): 43 | # __enter__方法 44 | print("open file:", file_name, "in __enter__") 45 | file_handler = open(file_name, "r") 46 | 47 | yield file_handler 48 | 49 | # __exit__方法 50 | print("close file:", file_name, "in __exit__") 51 | file_handler.close() 52 | return 53 | 54 | # 使用实例 55 | with open_func("python_base.py") as file_in: 56 | for line in file_in: 57 | print(line) 58 | break 59 | 60 | 61 | # 内置库contextlib的使用 62 | class MyOpen2(object): 63 | 64 | def __init__(self, file_name): 65 | """初始化方法""" 66 | self.file_handler = open(file_name, "r") 67 | return 68 | 69 | def close(self): 70 | """关闭文件,会被自动调用""" 71 | print("call close in MyOpen2") 72 | if self.file_handler: 73 | self.file_handler.close() 74 | return 75 | 76 | # 使用实例 77 | with contextlib.closing(MyOpen2("python_base.py")) as file_in: 78 | pass 79 | -------------------------------------------------------------------------------- /python_coroutine.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | """ 4 | python_coroutine.py by xianhu 5 | """ 6 | 7 | import asyncio 8 | import aiohttp 9 | import threading 10 | 11 | 12 | # 生产者、消费者例子 13 | def consumer(): # 定义消费者,由于有yeild关键词,此消费者为一个生成器 14 | print("[Consumer] Init Consumer ......") 15 | r = "init ok" # 初始化返回结果,并在启动消费者时,返回给生产者 16 | while True: 17 | n = yield r # 消费者通过yield关键词接收生产者产生的消息,同时返回结果给生产者 18 | print("[Consumer] conusme n = %s, r = %s" % (n, r)) 19 | r = "consume %s OK" % n # 消费者消费结果,下个循环返回给生产者 20 | 21 | 22 | def produce(c): # 定义生产者,此时的 c 为一个生成器 23 | print("[Producer] Init Producer ......") 24 | r = c.send(None) # 启动消费者生成器,同时第一次接收返回结果 25 | print("[Producer] Start Consumer, return %s" % r) 26 | n = 0 27 | while n < 5: 28 | n += 1 29 | print("[Producer] While, Producing %s ......" % n) 30 | r = c.send(n) # 向消费者发送消息,同时准备接收结果。此时会切换到消费者执行 31 | print("[Producer] Consumer return: %s" % r) 32 | c.close() # 关闭消费者生成器 33 | print("[Producer] Close Producer ......") 34 | 35 | # produce(consumer()) 36 | 37 | 38 | # 异步IO例子:适配Python3.4,使用asyncio库 39 | @asyncio.coroutine 40 | def hello(index): # 通过装饰器asyncio.coroutine定义协程 41 | print('Hello world! index=%s, thread=%s' % (index, threading.currentThread())) 42 | yield from asyncio.sleep(1) # 模拟IO任务 43 | print('Hello again! index=%s, thread=%s' % (index, threading.currentThread()))@asyncio.coroutine 44 | 45 | loop = asyncio.get_event_loop() # 得到一个事件循环模型 46 | tasks = [hello(1), hello(2)] # 初始化任务列表 47 | loop.run_until_complete(asyncio.wait(tasks)) # 执行任务 48 | loop.close() # 关闭事件循环列表 49 | 50 | 51 | # 异步IO例子:适配Python3.5,使用async和await关键字 52 | async def hello1(index): # 通过关键字async定义协程 53 | print('Hello world! index=%s, thread=%s' % (index, threading.currentThread())) 54 | await asyncio.sleep(1) # 模拟IO任务 55 | print('Hello again! index=%s, thread=%s' % (index, threading.currentThread())) 56 | 57 | loop = asyncio.get_event_loop() # 得到一个事件循环模型 58 | tasks = [hello1(1), hello1(2)] # 初始化任务列表 59 | loop.run_until_complete(asyncio.wait(tasks)) # 执行任务 60 | loop.close() # 关闭事件循环列表 61 | 62 | 63 | # aiohttp 实例 64 | async def get(url): 65 | async with aiohttp.ClientSession() as session: 66 | async with session.get(url) as resp: 67 | print(url, resp.status) 68 | print(url, await resp.text()) 69 | 70 | loop = asyncio.get_event_loop() # 得到一个事件循环模型 71 | tasks = [ # 初始化任务列表 72 | get("http://zhushou.360.cn/detail/index/soft_id/3283370"), 73 | get("http://zhushou.360.cn/detail/index/soft_id/3264775"), 74 | get("http://zhushou.360.cn/detail/index/soft_id/705490") 75 | ] 76 | loop.run_until_complete(asyncio.wait(tasks)) # 执行任务 77 | loop.close() # 关闭事件循环列表 78 | -------------------------------------------------------------------------------- /python_csv.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | """ 4 | python_csv.py by xianhu 5 | """ 6 | 7 | import csv 8 | import datetime 9 | 10 | # 数据 11 | data = [ 12 | [1, "a,bc", 19.353, datetime.datetime(2001, 3, 17)], 13 | [2, "ei,f", 13.287, datetime.datetime(2011, 4, 27)], 14 | [3, "q\"ij", 15.852, datetime.datetime(2003, 7, 14)], 15 | [4, "zh'n", 11.937, datetime.datetime(2012, 1, 9)], 16 | [5, "i\'op", 12.057, datetime.datetime(2009, 5, 18)], 17 | ] 18 | 19 | # 写文件 20 | with open("test.csv", "w") as file: 21 | writer = csv.writer(file, dialect="excel") 22 | # writer.writerows(data) 23 | for item in data: 24 | writer.writerow(item) 25 | 26 | # 读文件 27 | with open("test.csv", "r") as file: 28 | reader = csv.reader(file, dialect="excel") 29 | for item in reader: 30 | print(item) 31 | 32 | # 读文件 33 | with open("test.csv", "r") as file: 34 | reader = csv.DictReader(file, fieldnames=["id", "name", "float", "datetime"], dialect="excel") 35 | data = [item for item in reader] 36 | print(data) 37 | 38 | # 写文件 39 | with open("test.csv", "w") as file: 40 | writer = csv.DictWriter(file, fieldnames=["id", "name", "float", "datetime"], dialect="excel") 41 | writer.writeheader() 42 | for item in data: 43 | writer.writerow(item) 44 | -------------------------------------------------------------------------------- /python_datetime.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | import time 4 | import calendar 5 | import datetime 6 | 7 | 8 | # time模块中的三种时间形式 9 | print("time stamp:", time.time()) # 时间戳 10 | print("local time:", time.localtime()) # struct_time类型的本地时间 11 | print("utc time:", time.gmtime()) # struct_time类型的utc时间 12 | 13 | # time模块中,三种时间形式之间的转换 14 | time_stamp = time.time() # 时间戳 15 | local_time = time.localtime(time_stamp) # 时间戳转struct_time类型的本地时间 16 | utc_time = time.gmtime(time_stamp) # 时间戳转struct_time类型的utc时间 17 | 18 | time_stamp_1 = time.mktime(local_time) # struct_time类型的本地时间转时间戳 19 | time_stamp_2 = calendar.timegm(utc_time) # struct_time类型的utc时间转时间戳 20 | print(time_stamp, time_stamp_1, time_stamp_2) 21 | 22 | 23 | # time模块中,三种时间形式和字符串之间的转换 24 | print(time.ctime(time_stamp)) # 时间戳转字符串(本地时间字符串) 25 | 26 | print(time.asctime(local_time)) # struct_time类型的本地时间转字符串 27 | print(time.asctime(utc_time)) # struct_time类型的utc时间转字符串 28 | 29 | print(time.strftime("%Y-%m-%d, %H:%M:%S, %w", local_time)) # struct_time类型的本地时间转字符串:自定义格式 30 | print(time.strftime("%Y-%m-%d, %H:%M:%S, %w", utc_time)) # struct_time类型的utc时间转字符串:自定义格式 31 | 32 | struct_time = time.strptime("2016-11-15, 15:32:12, 2", "%Y-%m-%d, %H:%M:%S, %w") # 字符串转struct_time类型 33 | 34 | 35 | # datetime模块中datetime类的用法 36 | a_datetime_local = datetime.datetime.now() # 获取datetime.datetime类型的本地时间 37 | a_datetime_utc = datetime.datetime.utcnow() # 获取datetime.datetime类型的utc时间 38 | 39 | print(a_datetime_local.strftime("%Y-%m-%d, %H:%M:%S, %w")) # datetime.datetime类型转字符串 40 | print(a_datetime_utc.strftime("%Y-%m-%d, %H:%M:%S, %w")) # datetime.datetime类型转字符串 41 | 42 | a_datetime = datetime.datetime.strptime("2016-11-15, 15:32:12, 2", "%Y-%m-%d, %H:%M:%S, %w") # 字符串转datetime.datetime格式 43 | 44 | 45 | # datetime.datetime类和时间戳、struct_time类型之间的转换 46 | time_stamp = a_datetime_local.timestamp() # datetime类型转时间戳 47 | print(time_stamp) 48 | 49 | a_datetime_local = datetime.datetime.fromtimestamp(time.time()) # 时间戳转datetime.datetime类型的本地时间 50 | a_datetime_utc = datetime.datetime.utcfromtimestamp(time.time()) # 时间戳转datetime.datetime类型的utc时间 51 | print(a_datetime_local, a_datetime_utc) 52 | 53 | print(a_datetime_local.timetuple()) # datetime类型转struct_time类型 54 | print(a_datetime_utc.utctimetuple()) # datetime类型转struct_time类型 55 | -------------------------------------------------------------------------------- /python_decorator.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | """ 4 | python_decorator.py by xianhu 5 | """ 6 | 7 | import functools 8 | 9 | 10 | # 构建不带参数的装饰器 11 | def logging(func): 12 | @functools.wraps(func) 13 | def decorator(*args, **kwargs): 14 | print("%s called" % func.__name__) 15 | result = func(*args, **kwargs) 16 | print("%s end" % func.__name__) 17 | return result 18 | return decorator 19 | 20 | 21 | # 使用装饰器 22 | @logging 23 | def test01(a, b): 24 | print("in function test01, a=%s, b=%s" % (a, b)) 25 | return 1 26 | 27 | 28 | # 使用装饰器 29 | @logging 30 | def test02(a, b, c=1): 31 | print("in function test02, a=%s, b=%s, c=%s" % (a, b, c)) 32 | return 1 33 | 34 | 35 | # 构建带参数的装饰器 36 | def params_chack(*types, **kwtypes): 37 | def _outer(func): 38 | @functools.wraps(func) 39 | def _inner(*args, **kwargs): 40 | result = [isinstance(_param, _type) for _param, _type in zip(args, types)] 41 | assert all(result), "params_chack: invalid parameters" 42 | result = [isinstance(kwargs[_param], kwtypes[_param]) for _param in kwargs if _param in kwtypes] 43 | assert all(result), "params_chack: invalid parameters" 44 | return func(*args, **kwargs) 45 | return _inner 46 | return _outer 47 | 48 | 49 | # 使用装饰器 50 | @params_chack(int, (list, tuple)) 51 | def test03(a, b): 52 | print("in function test03, a=%s, b=%s" % (a, b)) 53 | return 1 54 | 55 | 56 | # 使用装饰器 57 | @params_chack(int, str, c=(int, str)) 58 | def test04(a, b, c): 59 | print("in function test04, a=%s, b=%s, c=%s" % (a, b, c)) 60 | return 1 61 | 62 | 63 | # 在类的成员方法中使用装饰器 64 | class ATest(object): 65 | @params_chack(object, int, str) 66 | def test(self, a, b): 67 | print("in function test of ATest, a=%s, b=%s" % (a, b)) 68 | return 1 69 | 70 | 71 | # 同时使用多个装饰器 72 | @logging 73 | @params_chack(int, str, (list, tuple)) 74 | def test05(a, b, c): 75 | print("in function test05, a=%s, b=%s, c=%s" % (a, b, c)) 76 | return 1 77 | 78 | 79 | # 构建不带参数的装饰器类 80 | class Decorator(object): 81 | 82 | def __init__(self, func): 83 | self.func = func 84 | return 85 | 86 | def __call__(self, *args, **kwargs): 87 | print("%s called" % self.func.__name__) 88 | result = self.func(*args, **kwargs) 89 | print("%s end" % self.func.__name__) 90 | return result 91 | 92 | 93 | # 使用装饰器 94 | @Decorator 95 | def test06(a, b, c): 96 | print("in function test06, a=%s, b=%s, c=%s" % (a, b, c)) 97 | return 1 98 | 99 | 100 | # 构建带参数的装饰器类 101 | class ParamCheck(object): 102 | 103 | def __init__(self, *types, **kwtypes): 104 | self.types = types 105 | self.kwtypes = kwtypes 106 | return 107 | 108 | def __call__(self, func): 109 | @functools.wraps(func) 110 | def _inner(*args, **kwargs): 111 | result = [isinstance(_param, _type) for _param, _type in zip(args, self.types)] 112 | assert all(result), "params_chack: invalid parameters" 113 | result = [isinstance(kwargs[_param], self.kwtypes[_param]) for _param in kwargs if _param in self.kwtypes] 114 | assert all(result), "params_chack: invalid parameters" 115 | return func(*args, **kwargs) 116 | return _inner 117 | 118 | 119 | # 使用装饰器 120 | @ParamCheck(int, str, (list, tuple)) 121 | def test07(a, b, c): 122 | print("in function test06, a=%s, b=%s, c=%s" % (a, b, c)) 123 | return 1 124 | 125 | 126 | # 装饰器实例: 函数缓存 127 | def funccache(func): 128 | cache = {} 129 | 130 | @functools.wraps(func) 131 | def _inner(*args): 132 | if args not in cache: 133 | cache[args] = func(*args) 134 | return cache[args] 135 | return _inner 136 | 137 | 138 | # 使用装饰器 139 | @funccache 140 | def test08(a, b, c): 141 | # 其他复杂或耗时计算 142 | return a + b + c 143 | 144 | 145 | # 使用Python自带的装饰器 @property 146 | class Person(object): 147 | 148 | def __init__(self): 149 | self._name = None 150 | return 151 | 152 | def get_name(self): 153 | print("get_name") 154 | return self._name 155 | 156 | def set_name(self, name): 157 | print("set_name") 158 | self._name = name 159 | return 160 | 161 | name = property(fget=get_name, fset=set_name, doc="person name") 162 | 163 | 164 | # 使用Python自带的装饰器 @property 165 | class People(object): 166 | 167 | def __init__(self): 168 | self._name = None 169 | self._age = None 170 | return 171 | 172 | @property 173 | def name(self): 174 | return self._name 175 | 176 | @name.setter 177 | def name(self, name): 178 | self._name = name 179 | return 180 | 181 | @property 182 | def age(self): 183 | return self._age 184 | 185 | @age.setter 186 | def age(self, age): 187 | assert 0 < age < 120 188 | self._age = age 189 | return 190 | 191 | 192 | # 类静态方法和类方法 193 | class A(object): 194 | var = 1 195 | 196 | def func(self): 197 | print(self.var) 198 | return 199 | 200 | @staticmethod 201 | def static_func(): 202 | print(A.var) 203 | return 204 | 205 | @classmethod 206 | def class_func(cls): 207 | print(cls.var) 208 | cls().func() 209 | return 210 | -------------------------------------------------------------------------------- /python_flask.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | # Flask中的一些定义 4 | # ============================================================================================================================= 5 | # WSGI: Web服务器网关接口,是一种Web服务使用的协议。 6 | # 路由: 处理URL和函数之间关系的程序称为"路由"。 7 | # 视图函数: 类似于index()这样的,被app.route装饰器注册为路由的函数,或者通过app.add_url_rule()添加路由映射关系的函数,被称为视图函数。 8 | # app.route(): 路由装饰器,可以带参数,参数可以指定数据类型:int/float/path。path类似于字符串,但不将反斜线/当做分隔符。 9 | # ============================================================================================================================= 10 | 11 | # Flask上下文全局变量 12 | # ============================================================================================================================= 13 | # current_app: 程序上下文,当前激活程序的程序实例,所有线程公用一个该实例。 14 | # g: 程序上下文,处理请求时用作临时存储的对象,每次请求都会重设这个变量。 15 | # request: 请求上下文,请求对象,封装了客户端发出的 HTTP 请求中的内容,不同线程之间互不干扰。 16 | # session: 请求上下问,用户会话,用于存储请求之间需要“记住”的值的词典。 17 | # ============================================================================================================================= 18 | 19 | # Flask支持的4种钩子函数 20 | # ============================================================================================================================= 21 | # before_first_request: 注册一个函数,在处理第一个请求之前运行。 22 | # before_request: 注册一个函数,在每次请求之前运行。 23 | # after_request: 注册一个函数,如果没有未处理的异常抛出,在每次请求之后运行。 24 | # teardown_request:注册一个函数,即使有未处理的异常抛出,也在每次请求之后运行。 25 | # ============================================================================================================================= 26 | 27 | # Jinja2模板使用 28 | # ============================================================================================================================= 29 | # 渲染模板: render_template("user.html", name=name) 30 | # (1) 变量: {{ name | capitalize }} 31 | # (2) 控制结构: 32 | # {% if user %} 33 | # Hello, {{ user }}! 34 | # {% else %} 35 | # Hello, Stranger! 36 | # {% endif %} 37 | # 38 | #
      39 | # {% for comment in comments %} 40 | #
    • {{ comment }}
    • {% endfor %} 41 | #
    42 | # (3) 宏-类似于函数: 43 | # {% macro render_comment(comment) %} 44 | #
  • {{ comment }}
  • 45 | # {% endmacro %} 46 | # 47 | #
      48 | # {% for comment in comments %} 49 | # {{ render_comment(comment) }} 50 | # {% endfor %} 51 | #
    52 | # ============================================================================================================================= 53 | 54 | # Jinja2变量过滤器 55 | # ============================================================================================================================= 56 | # safe: 渲染值时不转义 57 | # capitalize: 把值的首字母转换成大写,其他字母转换成小写 58 | # lower: 把值转换成小写形式 59 | # upper: 把值转换成大写形式 60 | # title: 把值中每个单词的首字母都转换成大写 61 | # trim: 把值的首尾空格去掉 62 | # striptags: 渲染之前把值中所有的 HTML 标签都删掉 63 | # ============================================================================================================================= 64 | 65 | # Flask-Bootstrap基模板中定义的块 66 | # ============================================================================================================================= 67 | # doc: 整个 HTML 文档 68 | # html_attribs: 标签的属性 69 | # html: 标签中的内容 70 | # head: 标签中的内容 71 | # title: 标签中的内容 72 | # metas: 一组 <meta> 标签 73 | # styles: 层叠样式表定义 74 | # body_attribs: <body> 标签的属性 75 | # body: <body> 标签中的内容 76 | # navbar: 用户定义的导航条 77 | # content: 用户定义的页面内容 78 | # scripts: 文档底部的 JavaScript 声明 79 | # ============================================================================================================================= 80 | 81 | # WTForms支持的HTML标准字段,注意添加app.config['SECRET_KEY'] = 'hard to guess string' 82 | # ============================================================================================================================= 83 | # StringField 文本字段 84 | # TextAreaField 多行文本字段 85 | # PasswordField 密码文本字段 86 | # HiddenField 隐藏文本字段 87 | # DateField 值为datatime.data格式的文本字段 88 | # DateTimeField 值为datatime.datatime格式的文本字段 89 | # DecimalField 值为decimal.Decimal格式的文本字段 90 | # IntegerField 值为整数的文本字段 91 | # FloatField 值为浮点数的文本字段 92 | # BooleanField 值为True或False的复选框 93 | # RadioField 一组单选框 94 | # SelectField 值唯一的下拉列表 95 | # SelectMultipleField 可选多个值得下拉列表 96 | # FileField 文件上传字段 97 | # SubmitField 表单提交按钮 98 | # FormField 把表单作为字段嵌入另一个表单 99 | # FieldList 一组指定类型的字段 100 | # ============================================================================================================================= 101 | 102 | # WTForms验证函数 103 | # ============================================================================================================================= 104 | # Email 验证电子邮件地址 105 | # EqualTo 比较两个字段的值;常用于要求输入两次密码进行确认的情况 106 | # IPAddress 验证 IPv4 网络地址 107 | # Length 验证输入字符串的长度 108 | # NumberRange 验证输入的值在数字范围内 109 | # Optional 无输入值时跳过其他验证函数 110 | # Required / DataRequired 确保字段中有数据 111 | # Regexp 使用正则表达式验证输入值 112 | # URL 验证 URL 113 | # AnyOf 确保输入值在可选值列表中 114 | # NoneOf 确保输入值不在可选值列表中 115 | # ============================================================================================================================= 116 | 117 | # uWSGI配置和nginx配置 118 | # ============================================================================================================================= 119 | # uwsgi -s /tmp/uwsgi.sock -w MyShow:app --chmod-socket=666 120 | # server { 121 | # listen 80; 122 | # server_name wangluopachong.com; 123 | # 124 | # charset utf-8; 125 | # 126 | # location / { 127 | # include uwsgi_params; 128 | # uwsgi_pass unix:/tmp/uwsgi.sock; 129 | # } 130 | # ============================================================================================================================= 131 | -------------------------------------------------------------------------------- /python_functional.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | from fn import _ 4 | from operator import add 5 | from functools import partial, reduce 6 | 7 | # 列表解析 8 | a_list = [item**2 for item in range(5)] 9 | print(a_list) 10 | 11 | # 字典解析 12 | a_dict = {"%d^2" % item: item**2 for item in range(5)} 13 | print(a_dict) 14 | 15 | # 生成器 16 | a_generator = (item**2 for item in range(5)) 17 | print(a_generator) 18 | print(next(a_generator)) 19 | print(next(a_generator)) 20 | 21 | # iter函数和next函数 22 | a_list_generator = iter(a_list) 23 | print(next(a_list_generator)) 24 | print(next(a_list_generator)) 25 | print(type(a_list), type(a_list_generator)) 26 | 27 | # lambda表达式 28 | a_func = lambda x, y: x**y 29 | print(a_func(2, 3)) 30 | 31 | # map函数 32 | print(map(abs, range(-4, 5))) 33 | print(list(map(abs, range(-4, 5)))) 34 | print(list(map(lambda x: x**2, range(5)))) 35 | print(list(map(lambda x, y: x**y, range(1, 5), range(1, 5)))) 36 | 37 | # reduce函数 38 | print(reduce(lambda x, y: x+y, range(10))) 39 | print(reduce(lambda x, y: x+y, range(10), 100)) 40 | print(reduce(lambda x, y: x+y, [[1, 2], [3, 4]], [0])) 41 | 42 | # filter函数 43 | print(filter(None, range(-4, 5))) 44 | print(list(filter(None, range(-4, 5)))) 45 | print(list(filter(lambda x: x > 0, range(-4, 5)))) 46 | 47 | # all、any函数 48 | print(all([0, 1, 2])) 49 | print(any([0, 1, 2])) 50 | 51 | # enumerate函数 52 | for index, item in enumerate(range(5)): 53 | print("%d: %d" % (index, item)) 54 | 55 | # zip函数 56 | for a, b in zip([1, 2, 3], ["a", "b", "c"]): 57 | print(a, b) 58 | a_dict = dict(zip([1, 2, 3], ["a", "b", "c"])) 59 | print(a_dict) 60 | 61 | # partial函数 62 | print(int("10010", base=2)) 63 | int_base_2 = partial(int, base=2) 64 | print(int_base_2("10010")) 65 | 66 | # operator.add函数 67 | print(reduce(lambda x, y: x+y, range(10))) 68 | print(reduce(add, range(10))) 69 | 70 | # fn的使用 71 | add_func_1 = (_ + 2) 72 | print(add_func_1(1)) 73 | add_func_2 = (_ + _ * _) 74 | print(add_func_2(1, 2, 3)) 75 | -------------------------------------------------------------------------------- /python_lda.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | """ 4 | python_lda.py by xianhu 5 | """ 6 | 7 | import os 8 | import numpy 9 | import logging 10 | from collections import defaultdict 11 | 12 | # 全局变量 13 | MAX_ITER_NUM = 10000 # 最大迭代次数 14 | VAR_NUM = 20 # 自动计算迭代次数时,计算方差的区间大小 15 | 16 | 17 | class BiDictionary(object): 18 | """ 19 | 定义双向字典,通过key可以得到value,通过value也可以得到key 20 | """ 21 | 22 | def __init__(self): 23 | """ 24 | :key: 双向字典初始化 25 | """ 26 | self.dict = {} # 正向的数据字典,其key为self的key 27 | self.dict_reversed = {} # 反向的数据字典,其key为self的value 28 | return 29 | 30 | def __len__(self): 31 | """ 32 | :key: 获取双向字典的长度 33 | """ 34 | return len(self.dict) 35 | 36 | def __str__(self): 37 | """ 38 | :key: 将双向字典转化为字符串对象 39 | """ 40 | str_list = ["%s\t%s" % (key, self.dict[key]) for key in self.dict] 41 | return "\n".join(str_list) 42 | 43 | def clear(self): 44 | """ 45 | :key: 清空双向字典对象 46 | """ 47 | self.dict.clear() 48 | self.dict_reversed.clear() 49 | return 50 | 51 | def add_key_value(self, key, value): 52 | """ 53 | :key: 更新双向字典,增加一项 54 | """ 55 | self.dict[key] = value 56 | self.dict_reversed[value] = key 57 | return 58 | 59 | def remove_key_value(self, key, value): 60 | """ 61 | :key: 更新双向字典,删除一项 62 | """ 63 | if key in self.dict: 64 | del self.dict[key] 65 | del self.dict_reversed[value] 66 | return 67 | 68 | def get_value(self, key, default=None): 69 | """ 70 | :key: 通过key获取value,不存在返回default 71 | """ 72 | return self.dict.get(key, default) 73 | 74 | def get_key(self, value, default=None): 75 | """ 76 | :key: 通过value获取key,不存在返回default 77 | """ 78 | return self.dict_reversed.get(value, default) 79 | 80 | def contains_key(self, key): 81 | """ 82 | :key: 判断是否存在key值 83 | """ 84 | return key in self.dict 85 | 86 | def contains_value(self, value): 87 | """ 88 | :key: 判断是否存在value值 89 | """ 90 | return value in self.dict_reversed 91 | 92 | def keys(self): 93 | """ 94 | :key: 得到双向字典全部的keys 95 | """ 96 | return self.dict.keys() 97 | 98 | def values(self): 99 | """ 100 | :key: 得到双向字典全部的values 101 | """ 102 | return self.dict_reversed.keys() 103 | 104 | def items(self): 105 | """ 106 | :key: 得到双向字典全部的items 107 | """ 108 | return self.dict.items() 109 | 110 | 111 | class CorpusSet(object): 112 | """ 113 | 定义语料集类,作为LdaBase的基类 114 | """ 115 | 116 | def __init__(self): 117 | """ 118 | :key: 初始化函数 119 | """ 120 | # 定义关于word的变量 121 | self.local_bi = BiDictionary() # id和word之间的本地双向字典,key为id,value为word 122 | self.words_count = 0 # 数据集中word的数量(排重之前的) 123 | self.V = 0 # 数据集中word的数量(排重之后的) 124 | 125 | # 定义关于article的变量 126 | self.artids_list = [] # 全部article的id的列表,按照数据读取的顺序存储 127 | self.arts_Z = [] # 全部article中所有词的id信息,维数为 M * art.length() 128 | self.M = 0 # 数据集中article的数量 129 | 130 | # 定义推断中用到的变量(可能为空) 131 | self.global_bi = None # id和word之间的全局双向字典,key为id,value为word 132 | self.local_2_global = {} # 一个字典,local字典和global字典之间的对应关系 133 | return 134 | 135 | def init_corpus_with_file(self, file_name): 136 | """ 137 | :key: 利用数据文件初始化语料集数据。文件每一行的数据格式: id[tab]word1 word2 word3...... 138 | """ 139 | with open(file_name, "r", encoding="utf-8") as file_iter: 140 | self.init_corpus_with_articles(file_iter) 141 | return 142 | 143 | def init_corpus_with_articles(self, article_list): 144 | """ 145 | :key: 利用article的列表初始化语料集。每一篇article的格式为: id[tab]word1 word2 word3...... 146 | """ 147 | # 清理数据--word数据 148 | self.local_bi.clear() 149 | self.words_count = 0 150 | self.V = 0 151 | 152 | # 清理数据--article数据 153 | self.artids_list.clear() 154 | self.arts_Z.clear() 155 | self.M = 0 156 | 157 | # 清理数据--清理local到global的映射关系 158 | self.local_2_global.clear() 159 | 160 | # 读取article数据 161 | for line in article_list: 162 | frags = line.strip().split() 163 | if len(frags) < 2: 164 | continue 165 | 166 | # 获取article的id 167 | art_id = frags[0].strip() 168 | 169 | # 获取word的id 170 | art_wordid_list = [] 171 | for word in [w.strip() for w in frags[1:] if w.strip()]: 172 | local_id = self.local_bi.get_key(word) if self.local_bi.contains_value(word) else len(self.local_bi) 173 | 174 | # 这里的self.global_bi为None和为空是有区别的 175 | if self.global_bi is None: 176 | # 更新id信息 177 | self.local_bi.add_key_value(local_id, word) 178 | art_wordid_list.append(local_id) 179 | else: 180 | if self.global_bi.contains_value(word): 181 | # 更新id信息 182 | self.local_bi.add_key_value(local_id, word) 183 | art_wordid_list.append(local_id) 184 | 185 | # 更新local_2_global 186 | self.local_2_global[local_id] = self.global_bi.get_key(word) 187 | 188 | # 更新类变量: 必须article中word的数量大于0 189 | if len(art_wordid_list) > 0: 190 | self.words_count += len(art_wordid_list) 191 | self.artids_list.append(art_id) 192 | self.arts_Z.append(art_wordid_list) 193 | 194 | # 做相关初始计算--word相关 195 | self.V = len(self.local_bi) 196 | logging.debug("words number: " + str(self.V) + ", " + str(self.words_count)) 197 | 198 | # 做相关初始计算--article相关 199 | self.M = len(self.artids_list) 200 | logging.debug("articles number: " + str(self.M)) 201 | return 202 | 203 | def save_wordmap(self, file_name): 204 | """ 205 | :key: 保存word字典,即self.local_bi的数据 206 | """ 207 | with open(file_name, "w", encoding="utf-8") as f_save: 208 | f_save.write(str(self.local_bi)) 209 | return 210 | 211 | def load_wordmap(self, file_name): 212 | """ 213 | :key: 加载word字典,即加载self.local_bi的数据 214 | """ 215 | self.local_bi.clear() 216 | with open(file_name, "r", encoding="utf-8") as f_load: 217 | for _id, _word in [line.strip().split() for line in f_load if line.strip()]: 218 | self.local_bi.add_key_value(int(_id), _word.strip()) 219 | self.V = len(self.local_bi) 220 | return 221 | 222 | 223 | class LdaBase(CorpusSet): 224 | """ 225 | LDA模型的基类,相关说明: 226 | 》article的下标范围为[0, self.M), 下标为 m 227 | 》wordid的下标范围为[0, self.V), 下标为 w 228 | 》topic的下标范围为[0, self.K), 下标为 k 或 topic 229 | 》article中word的下标范围为[0, article.size()), 下标为 n 230 | """ 231 | 232 | def __init__(self): 233 | """ 234 | :key: 初始化函数 235 | """ 236 | CorpusSet.__init__(self) 237 | 238 | # 基础变量--1 239 | self.dir_path = "" # 文件夹路径,用于存放LDA运行的数据、中间结果等 240 | self.model_name = "" # LDA训练或推断的模型名称,也用于读取训练的结果 241 | self.current_iter = 0 # LDA训练或推断的模型已经迭代的次数,用于继续模型训练过程 242 | self.iters_num = 0 # LDA训练或推断过程中Gibbs抽样迭代的总次数,整数值或者"auto" 243 | self.topics_num = 0 # LDA训练或推断过程中的topic的数量,即self.K值 244 | self.K = 0 # LDA训练或推断过程中的topic的数量,即self.topics_num值 245 | self.twords_num = 0 # LDA训练或推断结束后输出与每个topic相关的word的个数 246 | 247 | # 基础变量--2 248 | self.alpha = numpy.zeros(self.K) # 超参数alpha,K维的float值,默认为50/K 249 | self.beta = numpy.zeros(self.V) # 超参数beta,V维的float值,默认为0.01 250 | 251 | # 基础变量--3 252 | self.Z = [] # 所有word的topic信息,即Z(m, n),维数为 M * article.size() 253 | 254 | # 统计计数(可由self.Z计算得到) 255 | self.nd = numpy.zeros((self.M, self.K)) # nd[m, k]用于保存第m篇article中第k个topic产生的词的个数,其维数为 M * K 256 | self.ndsum = numpy.zeros((self.M, 1)) # ndsum[m, 0]用于保存第m篇article的总词数,维数为 M * 1 257 | self.nw = numpy.zeros((self.K, self.V)) # nw[k, w]用于保存第k个topic产生的词中第w个词的数量,其维数为 K * V 258 | self.nwsum = numpy.zeros((self.K, 1)) # nwsum[k, 0]用于保存第k个topic产生的词的总数,维数为 K * 1 259 | 260 | # 多项式分布参数变量 261 | self.theta = numpy.zeros((self.M, self.K)) # Doc-Topic多项式分布的参数,维数为 M * K,由alpha值影响 262 | self.phi = numpy.zeros((self.K, self.V)) # Topic-Word多项式分布的参数,维数为 K * V,由beta值影响 263 | 264 | # 辅助变量,目的是提高算法执行效率 265 | self.sum_alpha = 0.0 # 超参数alpha的和 266 | self.sum_beta = 0.0 # 超参数beta的和 267 | 268 | # 先验知识,格式为{word_id: [k1, k2, ...], ...} 269 | self.prior_word = defaultdict(list) 270 | 271 | # 推断时需要的训练模型 272 | self.train_model = None 273 | return 274 | 275 | # --------------------------------------------------辅助函数--------------------------------------------------------- 276 | def init_statistics_document(self): 277 | """ 278 | :key: 初始化关于article的统计计数。先决条件: self.M, self.K, self.Z 279 | """ 280 | assert self.M > 0 and self.K > 0 and self.Z 281 | 282 | # 统计计数初始化 283 | self.nd = numpy.zeros((self.M, self.K), dtype=numpy.int) 284 | self.ndsum = numpy.zeros((self.M, 1), dtype=numpy.int) 285 | 286 | # 根据self.Z进行更新,更新self.nd[m, k]和self.ndsum[m, 0] 287 | for m in range(self.M): 288 | for k in self.Z[m]: 289 | self.nd[m, k] += 1 290 | self.ndsum[m, 0] = len(self.Z[m]) 291 | return 292 | 293 | def init_statistics_word(self): 294 | """ 295 | :key: 初始化关于word的统计计数。先决条件: self.V, self.K, self.Z, self.arts_Z 296 | """ 297 | assert self.V > 0 and self.K > 0 and self.Z and self.arts_Z 298 | 299 | # 统计计数初始化 300 | self.nw = numpy.zeros((self.K, self.V), dtype=numpy.int) 301 | self.nwsum = numpy.zeros((self.K, 1), dtype=numpy.int) 302 | 303 | # 根据self.Z进行更新,更新self.nw[k, w]和self.nwsum[k, 0] 304 | for m in range(self.M): 305 | for k, w in zip(self.Z[m], self.arts_Z[m]): 306 | self.nw[k, w] += 1 307 | self.nwsum[k, 0] += 1 308 | return 309 | 310 | def init_statistics(self): 311 | """ 312 | :key: 初始化全部的统计计数。上两个函数的综合函数。 313 | """ 314 | self.init_statistics_document() 315 | self.init_statistics_word() 316 | return 317 | 318 | def sum_alpha_beta(self): 319 | """ 320 | :key: 计算alpha、beta的和 321 | """ 322 | self.sum_alpha = self.alpha.sum() 323 | self.sum_beta = self.beta.sum() 324 | return 325 | 326 | def calculate_theta(self): 327 | """ 328 | :key: 初始化并计算模型的theta值(M*K),用到alpha值 329 | """ 330 | assert self.sum_alpha > 0 331 | self.theta = (self.nd + self.alpha) / (self.ndsum + self.sum_alpha) 332 | return 333 | 334 | def calculate_phi(self): 335 | """ 336 | :key: 初始化并计算模型的phi值(K*V),用到beta值 337 | """ 338 | assert self.sum_beta > 0 339 | self.phi = (self.nw + self.beta) / (self.nwsum + self.sum_beta) 340 | return 341 | 342 | # ---------------------------------------------计算Perplexity值------------------------------------------------------ 343 | def calculate_perplexity(self): 344 | """ 345 | :key: 计算Perplexity值,并返回 346 | """ 347 | # 计算theta和phi值 348 | self.calculate_theta() 349 | self.calculate_phi() 350 | 351 | # 开始计算 352 | preplexity = 0.0 353 | for m in range(self.M): 354 | for w in self.arts_Z[m]: 355 | preplexity += numpy.log(numpy.sum(self.theta[m] * self.phi[:, w])) 356 | return numpy.exp(-(preplexity / self.words_count)) 357 | 358 | # --------------------------------------------------静态函数--------------------------------------------------------- 359 | @staticmethod 360 | def multinomial_sample(pro_list): 361 | """ 362 | :key: 静态函数,多项式分布抽样,此时会改变pro_list的值 363 | :param pro_list: [0.2, 0.7, 0.4, 0.1],此时说明返回下标1的可能性大,但也不绝对 364 | """ 365 | # 将pro_list进行累加 366 | for k in range(1, len(pro_list)): 367 | pro_list[k] += pro_list[k-1] 368 | 369 | # 确定随机数 u 落在哪个下标值,此时的下标值即为抽取的类别(random.rand()返回: [0, 1.0)) 370 | u = numpy.random.rand() * pro_list[-1] 371 | 372 | return_index = len(pro_list) - 1 373 | for t in range(len(pro_list)): 374 | if pro_list[t] > u: 375 | return_index = t 376 | break 377 | return return_index 378 | 379 | # ----------------------------------------------Gibbs抽样算法-------------------------------------------------------- 380 | def gibbs_sampling(self, is_calculate_preplexity): 381 | """ 382 | :key: LDA模型中的Gibbs抽样过程 383 | :param is_calculate_preplexity: 是否计算preplexity值 384 | """ 385 | # 计算preplexity值用到的变量 386 | pp_list = [] 387 | pp_var = numpy.inf 388 | 389 | # 开始迭代 390 | last_iter = self.current_iter + 1 391 | iters_num = self.iters_num if self.iters_num != "auto" else MAX_ITER_NUM 392 | for self.current_iter in range(last_iter, last_iter+iters_num): 393 | info = "......" 394 | 395 | # 是否计算preplexity值 396 | if is_calculate_preplexity: 397 | pp = self.calculate_perplexity() 398 | pp_list.append(pp) 399 | 400 | # 计算列表最新VAR_NUM项的方差 401 | pp_var = numpy.var(pp_list[-VAR_NUM:]) if len(pp_list) >= VAR_NUM else numpy.inf 402 | info = (", preplexity: " + str(pp)) + ((", var: " + str(pp_var)) if len(pp_list) >= VAR_NUM else "") 403 | 404 | # 输出Debug信息 405 | logging.debug("\titeration " + str(self.current_iter) + info) 406 | 407 | # 判断是否跳出循环 408 | if self.iters_num == "auto" and pp_var < (VAR_NUM / 2): 409 | break 410 | 411 | # 对每篇article的每个word进行一次抽样,抽取合适的k值 412 | for m in range(self.M): 413 | for n in range(len(self.Z[m])): 414 | w = self.arts_Z[m][n] 415 | k = self.Z[m][n] 416 | 417 | # 统计计数减一 418 | self.nd[m, k] -= 1 419 | self.ndsum[m, 0] -= 1 420 | self.nw[k, w] -= 1 421 | self.nwsum[k, 0] -= 1 422 | 423 | if self.prior_word and (w in self.prior_word): 424 | # 带有先验知识,否则进行正常抽样 425 | k = numpy.random.choice(self.prior_word[w]) 426 | else: 427 | # 计算theta值--下边的过程为抽取第m篇article的第n个词w的topic,即新的k 428 | theta_p = (self.nd[m] + self.alpha) / (self.ndsum[m, 0] + self.sum_alpha) 429 | 430 | # 计算phi值--判断是训练模型,还是推断模型(注意self.beta[w_g]) 431 | if self.local_2_global and self.train_model: 432 | w_g = self.local_2_global[w] 433 | phi_p = (self.train_model.nw[:, w_g] + self.nw[:, w] + self.beta[w_g]) / \ 434 | (self.train_model.nwsum[:, 0] + self.nwsum[:, 0] + self.sum_beta) 435 | else: 436 | phi_p = (self.nw[:, w] + self.beta[w]) / (self.nwsum[:, 0] + self.sum_beta) 437 | 438 | # multi_p为多项式分布的参数,此时没有进行标准化 439 | multi_p = theta_p * phi_p 440 | 441 | # 此时的topic即为Gibbs抽样得到的topic,它有较大的概率命中多项式概率大的topic 442 | k = LdaBase.multinomial_sample(multi_p) 443 | 444 | # 统计计数加一 445 | self.nd[m, k] += 1 446 | self.ndsum[m, 0] += 1 447 | self.nw[k, w] += 1 448 | self.nwsum[k, 0] += 1 449 | 450 | # 更新Z值 451 | self.Z[m][n] = k 452 | # 抽样完毕 453 | return 454 | 455 | # -----------------------------------------Model数据存储、读取相关函数------------------------------------------------- 456 | def save_parameter(self, file_name): 457 | """ 458 | :key: 保存模型相关参数数据,包括: topics_num, M, V, K, words_count, alpha, beta 459 | """ 460 | with open(file_name, "w", encoding="utf-8") as f_param: 461 | for item in ["topics_num", "M", "V", "K", "words_count"]: 462 | f_param.write("%s\t%s\n" % (item, str(self.__dict__[item]))) 463 | f_param.write("alpha\t%s\n" % ",".join([str(item) for item in self.alpha])) 464 | f_param.write("beta\t%s\n" % ",".join([str(item) for item in self.beta])) 465 | return 466 | 467 | def load_parameter(self, file_name): 468 | """ 469 | :key: 加载模型相关参数数据,和上一个函数相对应 470 | """ 471 | with open(file_name, "r", encoding="utf-8") as f_param: 472 | for line in f_param: 473 | key, value = line.strip().split() 474 | if key in ["topics_num", "M", "V", "K", "words_count"]: 475 | self.__dict__[key] = int(value) 476 | elif key in ["alpha", "beta"]: 477 | self.__dict__[key] = numpy.array([float(item) for item in value.split(",")]) 478 | return 479 | 480 | def save_zvalue(self, file_name): 481 | """ 482 | :key: 保存模型关于article的变量,包括: arts_Z, Z, artids_list等 483 | """ 484 | with open(file_name, "w", encoding="utf-8") as f_zvalue: 485 | for m in range(self.M): 486 | out_line = [str(w) + ":" + str(k) for w, k in zip(self.arts_Z[m], self.Z[m])] 487 | f_zvalue.write(self.artids_list[m] + "\t" + " ".join(out_line) + "\n") 488 | return 489 | 490 | def load_zvalue(self, file_name): 491 | """ 492 | :key: 读取模型的Z变量。和上一个函数相对应 493 | """ 494 | self.arts_Z = [] 495 | self.artids_list = [] 496 | self.Z = [] 497 | with open(file_name, "r", encoding="utf-8") as f_zvalue: 498 | for line in f_zvalue: 499 | frags = line.strip().split() 500 | art_id = frags[0].strip() 501 | w_k_list = [value.split(":") for value in frags[1:]] 502 | # 添加到类中 503 | self.artids_list.append(art_id) 504 | self.arts_Z.append([int(item[0]) for item in w_k_list]) 505 | self.Z.append([int(item[1]) for item in w_k_list]) 506 | return 507 | 508 | def save_twords(self, file_name): 509 | """ 510 | :key: 保存模型的twords数据,要用到phi的数据 511 | """ 512 | self.calculate_phi() 513 | out_num = self.V if self.twords_num > self.V else self.twords_num 514 | with open(file_name, "w", encoding="utf-8") as f_twords: 515 | for k in range(self.K): 516 | words_list = sorted([(w, self.phi[k, w]) for w in range(self.V)], key=lambda x: x[1], reverse=True) 517 | f_twords.write("Topic %dth:\n" % k) 518 | f_twords.writelines(["\t%s %f\n" % (self.local_bi.get_value(w), p) for w, p in words_list[:out_num]]) 519 | return 520 | 521 | def load_twords(self, file_name): 522 | """ 523 | :key: 加载模型的twords数据,即先验数据 524 | """ 525 | self.prior_word.clear() 526 | topic = -1 527 | with open(file_name, "r", encoding="utf-8") as f_twords: 528 | for line in f_twords: 529 | if line.startswith("Topic"): 530 | topic = int(line.strip()[6:-3]) 531 | else: 532 | word_id = self.local_bi.get_key(line.strip().split()[0].strip()) 533 | self.prior_word[word_id].append(topic) 534 | return 535 | 536 | def save_tag(self, file_name): 537 | """ 538 | :key: 输出模型最终给数据打标签的结果,用到theta值 539 | """ 540 | self.calculate_theta() 541 | with open(file_name, "w", encoding="utf-8") as f_tag: 542 | for m in range(self.M): 543 | f_tag.write("%s\t%s\n" % (self.artids_list[m], " ".join([str(item) for item in self.theta[m]]))) 544 | return 545 | 546 | def save_model(self): 547 | """ 548 | :key: 保存模型数据 549 | """ 550 | name_predix = "%s-%05d" % (self.model_name, self.current_iter) 551 | 552 | # 保存训练结果 553 | self.save_parameter(os.path.join(self.dir_path, "%s.%s" % (name_predix, "param"))) 554 | self.save_wordmap(os.path.join(self.dir_path, "%s.%s" % (name_predix, "wordmap"))) 555 | self.save_zvalue(os.path.join(self.dir_path, "%s.%s" % (name_predix, "zvalue"))) 556 | 557 | #保存额外数据 558 | self.save_twords(os.path.join(self.dir_path, "%s.%s" % (name_predix, "twords"))) 559 | self.save_tag(os.path.join(self.dir_path, "%s.%s" % (name_predix, "tag"))) 560 | return 561 | 562 | def load_model(self): 563 | """ 564 | :key: 加载模型数据 565 | """ 566 | name_predix = "%s-%05d" % (self.model_name, self.current_iter) 567 | 568 | # 加载训练结果 569 | self.load_parameter(os.path.join(self.dir_path, "%s.%s" % (name_predix, "param"))) 570 | self.load_wordmap(os.path.join(self.dir_path, "%s.%s" % (name_predix, "wordmap"))) 571 | self.load_zvalue(os.path.join(self.dir_path, "%s.%s" % (name_predix, "zvalue"))) 572 | return 573 | 574 | 575 | class LdaModel(LdaBase): 576 | """ 577 | LDA模型定义,主要实现训练、继续训练、推断的过程 578 | """ 579 | 580 | def init_train_model(self, dir_path, model_name, current_iter, iters_num=None, topics_num=10, twords_num=200, 581 | alpha=-1.0, beta=0.01, data_file="", prior_file=""): 582 | """ 583 | :key: 初始化训练模型,根据参数current_iter(是否等于0)决定是初始化新模型,还是加载已有模型 584 | :key: 当初始化新模型时,除了prior_file先验文件外,其余所有的参数都需要,且current_iter等于0 585 | :key: 当加载已有模型时,只需要dir_path, model_name, current_iter(不等于0), iters_num, twords_num即可 586 | :param iters_num: 可以为整数值或者“auto” 587 | """ 588 | if current_iter == 0: 589 | logging.debug("init a new train model") 590 | 591 | # 初始化语料集 592 | self.init_corpus_with_file(data_file) 593 | 594 | # 初始化部分变量 595 | self.dir_path = dir_path 596 | self.model_name = model_name 597 | self.current_iter = current_iter 598 | self.iters_num = iters_num 599 | self.topics_num = topics_num 600 | self.K = topics_num 601 | self.twords_num = twords_num 602 | 603 | # 初始化alpha和beta 604 | self.alpha = numpy.array([alpha if alpha > 0 else (50.0/self.K) for k in range(self.K)]) 605 | self.beta = numpy.array([beta if beta > 0 else 0.01 for w in range(self.V)]) 606 | 607 | # 初始化Z值,以便统计计数 608 | self.Z = [[numpy.random.randint(self.K) for n in range(len(self.arts_Z[m]))] for m in range(self.M)] 609 | else: 610 | logging.debug("init an existed model") 611 | 612 | # 初始化部分变量 613 | self.dir_path = dir_path 614 | self.model_name = model_name 615 | self.current_iter = current_iter 616 | self.iters_num = iters_num 617 | self.twords_num = twords_num 618 | 619 | # 加载已有模型 620 | self.load_model() 621 | 622 | # 初始化统计计数 623 | self.init_statistics() 624 | 625 | # 计算alpha和beta的和值 626 | self.sum_alpha_beta() 627 | 628 | # 初始化先验知识 629 | if prior_file: 630 | self.load_twords(prior_file) 631 | 632 | # 返回该模型 633 | return self 634 | 635 | def begin_gibbs_sampling_train(self, is_calculate_preplexity=True): 636 | """ 637 | :key: 训练模型,对语料集中的所有数据进行Gibbs抽样,并保存最后的抽样结果 638 | """ 639 | # Gibbs抽样 640 | logging.debug("sample iteration start, iters_num: " + str(self.iters_num)) 641 | self.gibbs_sampling(is_calculate_preplexity) 642 | logging.debug("sample iteration finish") 643 | 644 | # 保存模型 645 | logging.debug("save model") 646 | self.save_model() 647 | return 648 | 649 | def init_inference_model(self, train_model): 650 | """ 651 | :key: 初始化推断模型 652 | """ 653 | self.train_model = train_model 654 | 655 | # 初始化变量: 主要用到self.topics_num, self.K 656 | self.topics_num = train_model.topics_num 657 | self.K = train_model.K 658 | 659 | # 初始化变量self.alpha, self.beta,直接沿用train_model的值 660 | self.alpha = train_model.alpha # K维的float值,训练和推断模型中的K相同,故可以沿用 661 | self.beta = train_model.beta # V维的float值,推断模型中用于计算phi的V值应该是全局的word的数量,故可以沿用 662 | self.sum_alpha_beta() # 计算alpha和beta的和 663 | 664 | # 初始化数据集的self.global_bi 665 | self.global_bi = train_model.local_bi 666 | return 667 | 668 | def inference_data(self, article_list, iters_num=100, repeat_num=3): 669 | """ 670 | :key: 利用现有模型推断数据 671 | :param article_list: 每一行的数据格式为: id[tab]word1 word2 word3...... 672 | :param iters_num: 每一次迭代的次数 673 | :param repeat_num: 重复迭代的次数 674 | """ 675 | # 初始化语料集 676 | self.init_corpus_with_articles(article_list) 677 | 678 | # 初始化返回变量 679 | return_theta = numpy.zeros((self.M, self.K)) 680 | 681 | # 重复抽样 682 | for i in range(repeat_num): 683 | logging.debug("inference repeat_num: " + str(i+1)) 684 | 685 | # 初始化变量 686 | self.current_iter = 0 687 | self.iters_num = iters_num 688 | 689 | # 初始化Z值,以便统计计数 690 | self.Z = [[numpy.random.randint(self.K) for n in range(len(self.arts_Z[m]))] for m in range(self.M)] 691 | 692 | # 初始化统计计数 693 | self.init_statistics() 694 | 695 | # 开始推断 696 | self.gibbs_sampling(is_calculate_preplexity=False) 697 | 698 | # 计算theta 699 | self.calculate_theta() 700 | return_theta += self.theta 701 | 702 | # 计算结果,并返回 703 | return return_theta / repeat_num 704 | 705 | 706 | if __name__ == "__main__": 707 | """ 708 | 测试代码 709 | """ 710 | logging.basicConfig(level=logging.DEBUG, format="%(asctime)s\t%(levelname)s\t%(message)s") 711 | 712 | # train或者inference 713 | test_type = "train" 714 | # test_type = "inference" 715 | 716 | # 测试新模型 717 | if test_type == "train": 718 | model = LdaModel() 719 | # 由prior_file决定是否带有先验知识 720 | model.init_train_model("data/", "model", current_iter=0, iters_num="auto", topics_num=10, data_file="corpus.txt") 721 | # model.init_train_model("data/", "model", current_iter=0, iters_num="auto", topics_num=10, data_file="corpus.txt", prior_file="prior.twords") 722 | model.begin_gibbs_sampling_train() 723 | elif test_type == "inference": 724 | model = LdaModel() 725 | model.init_inference_model(LdaModel().init_train_model("data/", "model", current_iter=134)) 726 | data = [ 727 | "cn 咪咕 漫画 咪咕 漫画 漫画 更名 咪咕 漫画 资源 偷星 国漫 全彩 日漫 实时 在线看 随心所欲 登陆 漫画 资源 黑白 全彩 航海王", 728 | "co aircloud aircloud 硬件 设备 wifi 智能 手要 平板电脑 电脑 存储 aircloud 文件 远程 型号 aircloud 硬件 设备 wifi" 729 | ] 730 | result = model.inference_data(data) 731 | 732 | # 退出程序 733 | exit() 734 | -------------------------------------------------------------------------------- /python_magic_methods.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | """ 4 | python_magic_methods.py by xianhu 5 | """ 6 | 7 | 8 | # 定义一个能够自动比较大小的People类 9 | class People(object): 10 | 11 | def __init__(self, name, age): 12 | self.name = name 13 | self.age = age 14 | return 15 | 16 | def __str__(self): 17 | return self.name + ":" + str(self.age) 18 | 19 | def __lt__(self, other): 20 | return self.name < other.name if self.name != other.name else self.age < other.age 21 | 22 | print("\t".join([str(item) for item in sorted([People("abc", 18), People("abe", 19), People("abe", 12), People("abc", 17)])])) 23 | 24 | 25 | # Python实现任意深度的赋值 例如a[0] = 'value1'; a[1][2] = 'value2'; a[3][4][5] = 'value3' 26 | class MyDict(dict): 27 | 28 | def __setitem__(self, key, value): # 该函数不做任何改动 这里只是为了输出 29 | print("setitem:", key, value, self) 30 | super().__setitem__(key, value) 31 | return 32 | 33 | def __getitem__(self, item): # 主要技巧在该函数 34 | print("getitem:", item, self) 35 | # 基本思路: a[1][2]赋值时 需要先取出a[1] 然后给a[1]的[2]赋值 36 | if item not in self: # 如果a[1]不存在 37 | temp = MyDict() # 则需要新建一个dict 38 | super().__setitem__(item, temp) # 并使得a[1] = dict 39 | return temp # 返回a[1] 使得a[1][2] = value有效 40 | return super().__getitem__(item) # 如果a[1]存在 则直接返回a[1] 41 | 42 | # 使用例子: 43 | test = MyDict() 44 | test[0] = 'test' 45 | test[1][2] = 'test1' 46 | test[3][4][5] = 'test2' 47 | print("==========================") 48 | -------------------------------------------------------------------------------- /python_markov_chain.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | import nltk 4 | import random 5 | 6 | file = open('Text/Walden.txt', 'r') 7 | walden = file.read() 8 | walden = walden.split() 9 | 10 | 11 | def makePairs(arr): 12 | pairs = [] 13 | for i in range(len(arr)): 14 | if i < len(arr) - 1: 15 | temp = (arr[i], arr[i + 1]) 16 | pairs.append(temp) 17 | return pairs 18 | 19 | 20 | def generate(cfd, word='the', num=500): 21 | for i in range(num): 22 | # make an array with the words shown by proper count 23 | arr = [] 24 | for j in cfd[word]: 25 | for k in range(cfd[word][j]): 26 | arr.append(j) 27 | print(word, end=' ') 28 | 29 | # choose the word randomly from the conditional distribution 30 | word = arr[int((len(arr)) * random.random())] 31 | 32 | pairs = makePairs(walden) 33 | cfd = nltk.ConditionalFreqDist(pairs) 34 | generate(cfd) 35 | -------------------------------------------------------------------------------- /python_metaclass.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | """ 4 | python_metaclass.py by xianhu 5 | """ 6 | 7 | 8 | class Foo(object): 9 | def hello(self): 10 | print("hello world!") 11 | return 12 | 13 | foo = Foo() 14 | print(type(foo)) # <class '__main__.Foo'> 15 | print(type(foo.hello)) # <class 'method'> 16 | print(type(Foo)) # <class 'type'> 17 | 18 | temp = Foo # 赋值给其他变量 19 | Foo.var = 11 # 增加参数 20 | print(Foo) # 作为函数参数 21 | 22 | 23 | # ======================================================================== 24 | def init(self, name): 25 | self.name = name 26 | return 27 | 28 | 29 | def hello(self): 30 | print("hello %s" % self.name) 31 | return 32 | 33 | Foo = type("Foo", (object,), {"__init__": init, "hello": hello, "cls_var": 10}) 34 | foo = Foo("xianhu") 35 | print(foo.hello()) 36 | print(Foo.cls_var) 37 | 38 | print(foo.__class__) 39 | print(Foo.__class__) 40 | print(type.__class__) 41 | # ======================================================================== 42 | 43 | 44 | class Author(type): 45 | def __new__(mcs, name, bases, dict): 46 | # 添加作者属性 47 | dict["author"] = "xianhu" 48 | return super(Author, mcs).__new__(mcs, name, bases, dict) 49 | 50 | 51 | class Foo(object, metaclass=Author): 52 | pass 53 | 54 | foo = Foo() 55 | print(foo.author) 56 | -------------------------------------------------------------------------------- /python_numpy.py: -------------------------------------------------------------------------------- 1 | # _*_coding:utf-8-*_ 2 | import numpy as np 3 | # 定义矩阵变量并输出变量的一些属性 4 | # 用np.array()生成矩阵 5 | arr=np.array([[1,2,3], 6 | [4,5,6]]) 7 | 8 | print(arr) 9 | print('number of arr dimensions: ',arr.ndim) 10 | print('~ ~ ~ shape: ',arr.shape) 11 | print('~ ~ ~ size: ', arr.size) 12 | 13 | # 输出结果: 14 | [[1 2 3] 15 | [4 5 6]] 16 | number of arr dimensions: 2 17 | ~ ~ ~ shape: (2, 3) 18 | ~ ~ ~ size: 6 19 | 20 | # 定义一些特殊矩阵 21 | # 指定矩阵数据类型 22 | arr=np.array([[1,2,3], 23 | [4,5,6]], 24 | dtype=np.float64) # 我的电脑np.int是int32,还可以使用np.int32/np.int64/np.float32/np.float64 25 | print(arr.dtype) 26 | 27 | # 用np.zeros()生成全零矩阵 28 | arr_zeros=np.zeros( (2,3) ) 29 | print(arr_zeros) 30 | 31 | # 用np.ones()生成全一矩阵 32 | arr_ones=np.ones( (2,3) ) 33 | print(arr_ones) 34 | 35 | # 生成随机矩阵np.random.random() 36 | arr_random=np.random.random((2,3)) 37 | print(arr_random) 38 | 39 | # 用np.arange()生成数列 40 | arr=np.arange(6,12) 41 | print(arr) 42 | 43 | # 用np.arange().reshape()将数列转成矩阵 44 | arr=np.arange(6,12).reshape( (2,3) ) 45 | print(arr) 46 | 47 | # 用np.linspace(开始,结束,多少点划分线段),同样也可以用reshape() 48 | arr=np.linspace(1,5,3) 49 | print(arr) 50 | 51 | # 矩阵运算 52 | arr1=np.array([1,2,3,6]) 53 | arr2=np.arange(4) 54 | 55 | # 矩阵减法,加法同理 56 | arr_sub=arr1-arr2 57 | print(arr1) 58 | print(arr2) 59 | print(arr_sub) 60 | 61 | # 矩阵乘法 62 | arr_multi=arr1**3 # 求每个元素的立方,在python中幂运算用**来表示 63 | print(arr_multi) 64 | 65 | arr_multi=arr1*arr2 # 元素逐个相乘 66 | print(arr_multi) 67 | 68 | arr_multi=np.dot(arr1, arr2.reshape((4,1))) # 维度1*4和4*1矩阵相乘 69 | print(arr_multi) 70 | 71 | arr_multi=np.dot(arr1.reshape((4,1)), arr2.reshape((1,4))) # 维度4*1和1*4矩阵相乘 72 | print(arr_multi) 73 | 74 | arr_multi=arr1.dot(arr2.reshape((4,1))) # 也可以使用矩阵名.doc(矩阵名) 75 | print(arr_multi) 76 | 77 | # 三角运算:np.sin()/np.cos()/np.tan() 78 | arr_sin=np.sin(arr1) 79 | print(arr_sin) 80 | 81 | # 逻辑运算 82 | print(arr1<3) # 查看arr1矩阵中哪些元素小于3,返回[ True True False False] 83 | 84 | # 矩阵求和,求矩阵最大最小值 85 | arr1=np.array([[1,2,3], 86 | [4,5,6]]) 87 | print(arr1) 88 | print(np.sum(arr1)) # 矩阵求和 89 | print(np.sum(arr1,axis=0)) # 矩阵每列求和 90 | print(np.sum(arr1,axis=1).reshape(2,1)) # 矩阵每行求和 91 | 92 | print(np.min(arr1)) # 求矩阵最小值 93 | print(np.min(arr1,axis=0)) 94 | print(np.min(arr1,axis=1)) 95 | 96 | print(np.max(arr1)) # 求矩阵最大值 97 | 98 | print(np.mean(arr1)) # 输出矩阵平均值,也可以用arr1.mean() 99 | print(np.median(arr1)) # 输出矩阵中位数 100 | 101 | # 输出矩阵某些值的位置 102 | arr1=np.arange(2,14).reshape((3,4)) 103 | print(arr1) 104 | 105 | print(np.argmin(arr1)) # 输出矩阵最小值的位置,0 106 | print(np.argmax(arr1)) # 输出矩阵最大值的位置,11 107 | 108 | print(np.cumsum(arr1)) # 输出前一个数的和,前两个数的和,等等 109 | print(np.diff(arr1)) # 输出相邻两个数的差值 110 | 111 | arr_zeros=np.zeros((3,4)) 112 | print(np.nonzero(arr_zeros)) #输出矩阵非零元素位置,返回多个行向量,第i个行向量表示第i个维度 113 | print(np.nonzero(arr1)) 114 | 115 | print(np.sort(arr1)) # 矩阵逐行排序 116 | print(np.transpose(arr1)) # 矩阵转置,也可以用arr1.T 117 | 118 | print(np.clip(arr1,5,9)) #将矩阵中小于5的数置5,大于9的数置9 119 | 120 | # numpy索引 121 | arr1=np.array([1,2,3,6]) 122 | arr2=np.arange(2,8).reshape(2,3) 123 | 124 | print(arr1) 125 | print(arr1[0]) # 索引从0开始计数 126 | 127 | print(arr2) 128 | print(arr2[0][2]) # arr[行][列],也可以用arr[行,列] 129 | print(arr2[0,:]) # 用:来代表所有元素的意思 130 | print(arr2[0,0:3]) # 表示输出第0行,从第0列到第2列所有元素 131 | # 注意python索引一般是左闭右开 132 | 133 | # 通过for循环每次输出矩阵的一行 134 | for row in arr2: 135 | print(row) 136 | 137 | # 如果要每次输出矩阵的一列,就先将矩阵转置 138 | arr2_T=arr2.T 139 | print(arr2_T) 140 | for row in arr2_T: 141 | print(row) 142 | 143 | # 将矩阵压成一行逐个输出元素 144 | arr2_flat=arr2.flatten() 145 | print(arr2_flat) 146 | 147 | for i in arr2.flat: # 也可以用arr2.flatten() 148 | print(i) 149 | 150 | # 矩阵合并与分割 151 | # 矩阵合并 152 | arr1=np.array([1,2,3,6]) 153 | arr2=np.arange(4) 154 | arr3=np.arange(2,16+1,2).reshape(2,4) 155 | print(arr1) 156 | print(arr2) 157 | print(arr3) 158 | 159 | arr_hor=np.hstack((arr1,arr2)) # 水平合并,horizontal 160 | arr_ver=np.vstack((arr1,arr3)) # 垂直合并,vertical 161 | print(arr_hor) 162 | print(arr_ver) 163 | 164 | # 矩阵分割 165 | print('arr3: ',arr3) 166 | print(np.split(arr3,4,axis=1)) # 将矩阵按列均分成4块 167 | print(np.split(arr3,2,axis=0)) # 将矩阵按行均分成2块 168 | print(np.hsplit(arr3,4)) # 将矩阵按列均分成4块 169 | print(np.vsplit(arr3,2)) # 将矩阵按行均分成2块 170 | print(np.array_split(arr3,3,axis=1)) # 将矩阵进行不均等划分 171 | 172 | # numpy复制:浅复制,深复制 173 | # 浅复制 174 | arr1=np.array([3,1,2,3]) 175 | print(arr1) 176 | a1=arr1 177 | b1=a1 178 | # 通过上述赋值运算,arr1,a1,b1都指向了同一个地址(浅复制) 179 | print(a1 is arr1) 180 | print(b1 is arr1) 181 | print(id(a1)) 182 | print(id(b1)) 183 | print(id(arr1)) 184 | 185 | # 会发现通过b1[0]改变内容,arr1,a1,b1的内容都改变了 186 | b1[0]=6 187 | print(b1) 188 | print(a1) 189 | print(arr1) 190 | 191 | # 深复制 192 | arr2=np.array([3,1,2,3]) 193 | print('\n') 194 | print(arr2) 195 | b2=arr2.copy() # 深复制,此时b2拥有不同于arr2的空间 196 | a2=b2.copy() 197 | # 通过上述赋值运算,arr1,a1,b1都指向了不同的地址(深复制) 198 | print(id(arr2)) 199 | print(id(a2)) 200 | print(id(b2)) 201 | # 此时改变b2,a2的值,互不影响 202 | b2[0]=1 203 | a2[0]=2 204 | print(b2) 205 | print(a2) 206 | print(arr2) 207 | 208 | # 线性代数模块(linalg) 209 | # 求范数 210 | a=np.array([5,12]) 211 | print(a) 212 | b=np.linalg.norm(a) # norm表示范数,默认求2范数,ord=1求1范数,ord=np.inf求无穷范数 213 | print(b) 214 | 215 | # 求矩阵的迹、行列式、秩、特征值、特征向量 216 | b = np.array([ 217 | [1, 2, 3], 218 | [4, 5, 6], 219 | [7, 8, 9] 220 | ]) 221 | 222 | print(np.trace(b)) # 15,求矩阵的迹(主对角线上各个元素的总和) 223 | 224 | c=np.linalg.det(b) 225 | print(c) # 输出一个很小的值6.66133814775e-16,求矩阵的行列式值 226 | # 如果希望输出为0,使用round(c, 2),四舍五入保留小数点后两位 227 | # 不过对精度要求高可以使用decimal模块 228 | 229 | c=np.linalg.matrix_rank(b) 230 | print(c) # 2,求矩阵的秩 231 | 232 | u,v=np.linalg.eig(b) # u为特征值 233 | print(u) 234 | print(v) 235 | 236 | # 矩阵分解 237 | # Cholesky分解并重建 238 | d = np.array([ 239 | [2, 1], 240 | [1, 2] 241 | ]) 242 | 243 | l = np.linalg.cholesky(d) 244 | print(l) # 得到下三角矩阵 245 | e=np.dot(l, l.T) 246 | print(e) # 重建得到矩阵d 247 | 248 | 249 | # 对不正定矩阵,进行SVD分解并重建 250 | U, s, V = np.linalg.svd(d) 251 | 252 | S = np.array([ 253 | [s[0], 0], 254 | [0, s[1]] 255 | ]) 256 | 257 | print(np.dot(U, np.dot(S, V))) # 重建得到矩阵d 258 | 259 | # 矩阵乘法 260 | # https://docs.scipy.org/doc/numpy/reference/generated/numpy.dot.html#numpy.dot 261 | print(np.dot(3, 4)) # 12,0-D矩阵相乘(也就是标量相乘) 262 | 263 | print(np.dot([2j, 3j], [2j, 3j])) # (-13+0j),1-D矩阵相乘(实际上是向量做点积) 264 | 265 | a=[[1, 0], [0, 1]] 266 | b=[[4, 1, 0], [2, 2, 0]] 267 | print(np.dot(a, b)) 268 | ''' 269 | array([[4, 1], 270 | [2, 2]]) 271 | 2-D矩阵相乘 272 | 这里是2*2矩阵和2*3矩阵相乘,结果为2*3矩阵 273 | ''' 274 | 275 | a=[[1, 0], [1, 2]] 276 | b=[2,2] 277 | c=np.dot(a,b) 278 | print(c) 279 | ''' 280 | [2 6] 281 | 注意这里b是向量 282 | numpy处理时并不是按照矩阵乘法规则计算 283 | 而是向量点积 284 | 也就是np.dot([1, 0],[1, 2])和np.dot([1, 2],[2,2]) 285 | ''' 286 | 287 | # 再做个实验来区别向量乘法和矩阵乘法 288 | b=np.array([ 289 | [1, 2, 3], 290 | [4, 5, 6], 291 | [7, 8, 9] 292 | ]) 293 | 294 | # 这里插播一下,np.array([1,0,1])是3维向量,而不是1*3的矩阵 295 | c1=np.array([[1,0,2]]) 296 | print(c1.shape) # (1, 3),这是一个1*3的矩阵 297 | c2=np.array([1,0,2]) 298 | print(c2.shape) # (3,),这是一个3维向量 299 | 300 | # print(np.dot(b,c1)) # 报错,不符合矩阵乘法规则 301 | print(np.dot(b,c2)) # [ 7 16 25],点积运算 302 | 303 | print(np.dot(c1,b)) # [[15 18 21]],矩阵乘法运算规则 304 | print(np.dot(c2,b)) # [15 18 21],点积运算 305 | 306 | # 还要补充一下,如果是用python自带的*运算符计算则是广播机制 307 | print(b*c1) # print(b*c2)结果一样 308 | ''' 309 | [[ 1 0 6] 310 | [ 4 0 12] 311 | [ 7 0 18]] 312 | ''' 313 | print(b+c1) # print(b*c2)结果一样 314 | ''' 315 | [[ 2 2 5] 316 | [ 5 5 8] 317 | [ 8 8 11]] 318 | ''' 319 | -------------------------------------------------------------------------------- /python_oneline.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | """ 4 | python_oneline.py by xianhu 5 | """ 6 | 7 | 8 | # 首先来个python之禅 9 | # python -c "import this" 10 | """ 11 | The Zen of Python, by Tim Peters 12 | 13 | Beautiful is better than ugly. 14 | Explicit is better than implicit. 15 | Simple is better than complex. 16 | Complex is better than complicated. 17 | Flat is better than nested. 18 | Sparse is better than dense. 19 | Readability counts. 20 | Special cases aren't special enough to break the rules. 21 | Although practicality beats purity. 22 | Errors should never pass silently. 23 | Unless explicitly silenced. 24 | In the face of ambiguity, refuse the temptation to guess. 25 | There should be one-- and preferably only one --obvious way to do it. 26 | Although that way may not be obvious at first unless you're Dutch. 27 | Now is better than never. 28 | Although never is often better than *right* now. 29 | If the implementation is hard to explain, it's a bad idea. 30 | If the implementation is easy to explain, it may be a good idea. 31 | Namespaces are one honking great idea -- let's do more of those! 32 | """ 33 | 34 | 35 | # 一行代码启动一个Web服务 36 | # python -m SimpleHTTPServer 8080 37 | # python3 -m http.server 8080 38 | 39 | 40 | # 一行代码实现变量值互换 41 | a, b = 1, 2; a, b = b, a 42 | 43 | 44 | # 一行代码解决FizzBuzz问题: 打印数字1到100, 3的倍数打印“Fizz”来替换这个数, 5的倍数打印“Buzz”, 既是3又是5的倍数的打印“FizzBuzz” 45 | print(' '.join(["fizz"[x % 3 * 4:]+"buzz"[x % 5 * 4:] or str(x) for x in range(1, 101)])) 46 | 47 | 48 | # 一行代码输出特定字符"Love"拼成的心形 49 | print('\n'.join([''.join([('Love'[(x-y) % len('Love')] if ((x*0.05)**2+(y*0.1)**2-1)**3-(x*0.05)**2*(y*0.1)**3 <= 0 else ' ') for x in range(-30, 30)]) for y in range(30, -30, -1)])) 50 | 51 | 52 | # 一行代码输出Mandelbrot图像: Mandelbrot图像中的每个位置都对应于公式N=x+y*i中的一个复数 53 | print('\n'.join([''.join(['*'if abs((lambda a: lambda z, c, n: a(a, z, c, n))(lambda s, z, c, n: z if n == 0 else s(s, z*z+c, c, n-1))(0, 0.02*x+0.05j*y, 40)) < 2 else ' ' for x in range(-80, 20)]) for y in range(-20, 20)])) 54 | 55 | 56 | # 一行代码打印九九乘法表 57 | print('\n'.join([' '.join(['%s*%s=%-2s' % (y, x, x*y) for y in range(1, x+1)]) for x in range(1, 10)])) 58 | 59 | 60 | # 一行代码计算出1-100之间的素数(两个版本) 61 | print(' '.join([str(item) for item in filter(lambda x: not [x % i for i in range(2, x) if x % i == 0], range(2, 101))])) 62 | print(' '.join([str(item) for item in filter(lambda x: all(map(lambda p: x % p != 0, range(2, x))), range(2, 101))])) 63 | 64 | 65 | # 一行代码输出斐波那契数列 66 | print([x[0] for x in [(a[i][0], a.append([a[i][1], a[i][0]+a[i][1]])) for a in ([[1, 1]], ) for i in range(30)]]) 67 | 68 | 69 | # 一行代码实现快排算法 70 | qsort = lambda arr: len(arr) > 1 and qsort(list(filter(lambda x: x <= arr[0], arr[1:]))) + arr[0:1] + qsort(list(filter(lambda x: x > arr[0], arr[1:]))) or arr 71 | 72 | 73 | # 一行代码解决八皇后问题 74 | [__import__('sys').stdout.write('\n'.join('.' * i + 'Q' + '.' * (8-i-1) for i in vec) + "\n========\n") for vec in __import__('itertools').permutations(range(8)) if 8 == len(set(vec[i]+i for i in range(8))) == len(set(vec[i]-i for i in range(8)))] 75 | 76 | 77 | # 一行代码实现数组的flatten功能: 将多维数组转化为一维 78 | flatten = lambda x: [y for l in x for y in flatten(l)] if isinstance(x, list) else [x] 79 | 80 | 81 | # 一行代码实现list, 有点类似与上个功能的反功能 82 | array = lambda x: [x[i:i+3] for i in range(0, len(x), 3)] 83 | 84 | 85 | # 一行代码实现求解2的1000次方的各位数之和 86 | print(sum(map(int, str(2**1000)))) 87 | 88 | 89 | # 最后推荐一篇文章: [Python One-liner Games](http://arunrocks.com/python-one-liner-games/) 90 | exit() 91 | -------------------------------------------------------------------------------- /python_requests.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | """ 4 | python_requests.py by xianhu 5 | """ 6 | 7 | import requests.adapters 8 | 9 | # 不同方式获取网页内容, 返回一个Response对象, 请求的参数可以为url或Request对象 10 | r0 = requests.get("https://github.com/timeline.json") 11 | r1 = requests.post("http://httpbin.org/post") 12 | r2 = requests.put("http://httpbin.org/put") 13 | r3 = requests.delete("http://httpbin.org/delete") 14 | r4 = requests.head("http://httpbin.org/get") 15 | r5 = requests.options("http://httpbin.org/get") 16 | r6 = requests.patch("http://httpbin.org/get") 17 | 18 | # Request对象: 19 | # class requests.Request(method=None, url=None, headers=None, files=None, data=None, params=None, auth=None, cookies=None, hooks=None, json=None) 20 | 21 | # 上边所有的获取方式都调用底层的request方法, 所以request方法有的参数, 上边几个函数都应该有: 22 | # requests.request(method, url, **kwargs) 23 | # kwargs包括: params / data / json / headers / cookies / files / auth / timeout / allow_redirects(bool) / proxies / verify(bool) / stream / cert 24 | 25 | # Response对象: class requests.Response 26 | # 包含的主要属性: content / cookies / encoding / headers / history / is_permanent_redirect / is_redirect / reason / status_code / text / url 等 27 | # 包含的主要方法: iter_content(chunk_size=1, decode_unicode=False) / iter_lines(chunk_size=512, decode_unicode=None, delimiter=None) 28 | # 包含的主要方法: close() / json(**kwargs) / raise_for_status() 等 29 | 30 | # 以字典的形式传递URL参数, 也可以直接以?xx=xx&xx=xx的形式将其放在url后 31 | params = {"key1": "value1", "key2": "value2"} 32 | r = requests.get("http://httpbin.org/get", params=params) 33 | print(r.url) # http://httpbin.org/get?key2=value2&key1=value1 34 | 35 | # 以字典的形式传递URL参数: 字典里带有列表 36 | params = {"key1": "value1", "key2": ["value2", "value3"]} 37 | r = requests.get("http://httpbin.org/get", params=params) 38 | print(r.url) # http://httpbin.org/get?key1=value1&key2=value2&key2=value3 39 | 40 | # 获取网页内容 41 | r = requests.get("https://github.com/timeline.json") 42 | print(r.text) # 返回正常的网页内容, 即解压解码之后的内容 43 | print(r.content) # 返回byte类型的网页内容, 即值解压, 没有解码 44 | print(r.json()) # 如果网页内容为json, 直接返回一个json对象 45 | print(r.encoding) # 返回网页的编码: "utf-8" 46 | 47 | # Requests会自动解码来自服务器的内容, 也可以自己更改 48 | r.encoding = "ISO-8859-1" 49 | print(r.text) # 此时使用新的r.encoding解码后的新值 50 | 51 | # 编码的其他操作 52 | # requests.utils.get_encodings_from_content(content): Returns encodings from given content string. 53 | # requests.utils.get_encoding_from_headers(headers): Returns encodings from given HTTP Header Dict. 54 | # requests.utils.get_unicode_from_response(r): Returns the requested content back in unicode. 55 | 56 | # 原始响应内容: 获取来自服务器的原始套接字响应 57 | r = requests.get("https://github.com/timeline.json", stream=True) 58 | print(r.raw) # <requests.packages.urllib3.response.HTTPResponse object at 0x101194810> 59 | print(r.raw.read(10)) # "\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\x03" 60 | 61 | # 一般情况下, 应该以下面的模式将文本流保存到文件 62 | with open("test", "wb") as fd: 63 | for chunk in r.iter_content(chunk_size=256): 64 | fd.write(chunk) 65 | # 注意: 设置的timeout对connect和read起作用. 但一旦和服务器建立连接, r.content或r.iter_content就处于一个read的状态, 不受timeout影响 66 | 67 | # 定制请求头: 一个字典 68 | headers = {"user-agent": "my-app/0.0.1"} 69 | r = requests.get("https://api.github.com/some/endpoint", headers=headers) 70 | print(r.request.headers) # 获取request的头部 71 | print(r.headers) # 获取response的头部 72 | # { 73 | # "content-encoding": "gzip", 74 | # "transfer-encoding": "chunked", 75 | # "connection": "close", 76 | # "server": "nginx/1.0.4", 77 | # "x-runtime": "148ms", 78 | # "etag": "e1ca502697e5c9317743dc078f67693f", 79 | # "content-type": "application/json" 80 | # } 81 | print(r.headers["Content-Type"]) # "application/json" 82 | print(r.headers.get("content-type")) # "application/json" 83 | 84 | # 更加复杂的POST请求: 表单 85 | post_dict = {"key1": "value1", "key2": "value2"} 86 | r = requests.post("http://httpbin.org/post", data=post_dict) 87 | print(r.text) 88 | 89 | # POST一个多部分编码(Multipart-Encoded)的文件 90 | files = {"file": open("report.xls", "rb")} 91 | r = requests.post("http://httpbin.org/post", files=files) 92 | print(r.text) 93 | 94 | # 你可以显式地设置文件名, 文件类型和请求头 95 | files = {"file": ("report.xls", open("report.xls", "rb"), "application/vnd.ms-excel", {"Expires": "0"})} 96 | r = requests.post("http://httpbin.org/post", files=files) 97 | print(r.text) 98 | 99 | # 你也可以发送文本字符串 100 | files = {"file": ("report.csv", "some,data,to,send\nanother,row,to,send\n")} 101 | r = requests.post("http://httpbin.org/post", files=files) 102 | print(r.text) 103 | 104 | # 响应状态码 105 | r = requests.get("http://httpbin.org/get") 106 | print(r.status_code) # 200 107 | print(r.status_code == requests.codes.ok) # True 响应状态码查询 108 | 109 | # 如果发送了一个错误请求(4XX客户端错误, 或5XX服务器错误响应), 可以通过 Response.raise_for_status() 来抛出异常: 110 | bad_r = requests.get("http://httpbin.org/status/404") 111 | print(bad_r.status_code) # 404 112 | bad_r.raise_for_status() # 引发异常 113 | 114 | # Cookie: 如果某个响应中包含一些cookie, 则会被放到response.cookies(CookieJar类型)中 115 | r = requests.get("http://example.com/some/cookie/setting/url") 116 | print(r.cookies["example_cookie_name"]) # "example_cookie_value" 117 | 118 | # 要想发送你的cookies到服务器, 可以使用cookies参数(一个字典) 119 | cookies = {"cookies_are": "working"} 120 | r = requests.get("http://httpbin.org/cookies", cookies=cookies) 121 | print(r.text) 122 | 123 | # cookie的其他操作 124 | # requests.utils.dict_from_cookiejar(cj): Returns a key/value dictionary from a CookieJar. 125 | # requests.utils.cookiejar_from_dict(cookie_dict, cookiejar=None, overwrite=True): Returns a CookieJar from a key/value dictionary. 126 | # requests.utils.add_dict_to_cookiejar(cj, cookie_dict): Returns a CookieJar from a key/value dictionary. 127 | 128 | # 通用CookieJar类, 一个cookielib.CookieJar, 但是提供一个dict接口 129 | # class requests.cookies.RequestsCookieJar(policy=None): Compatibility class; is a cookielib.CookieJar, but exposes a dict interface. 130 | 131 | # 会话对象: 会话对象让你能够跨请求保持某些参数, 它也会在同一个Session实例发出的所有请求之间保持cookie 132 | s = requests.Session() 133 | s.get("http://httpbin.org/cookies/set/sessioncookie/123456789") 134 | s.get("http://httpbin.org/cookies") 135 | for cookie in s.cookies: 136 | print(cookie) 137 | 138 | # 如果你要手动为会话添加cookie, 就是用Cookie utility函数来操纵Session.cookies 139 | requests.utils.add_dict_to_cookiejar(s.cookies, {"cookie_key": "cookie_value"}) 140 | 141 | # 会话也可用来为请求方法提供缺省数据, 这是通过为会话对象的属性提供数据来实现的 142 | s.auth = ("user", "pass") 143 | s.headers.update({"x-test": "true"}) 144 | s.get("http://httpbin.org/headers", headers={"x-test2": "true"}) # both "x-test" and "x-test2" are sent 145 | 146 | # 不过需要注意, 就算使用了会话, 方法级别的参数也不会被跨请求保持, 下面的例子只会给第一个请求发送cookie 147 | s.get("http://httpbin.org/cookies", cookies={"from-my": "browser"}) # 带有cookie 148 | s.get("http://httpbin.org/cookies") # 不带cookie 149 | 150 | # 会话还可以用作前后文管理器 151 | with requests.Session() as s: 152 | s.get("http://httpbin.org/cookies/set/sessioncookie/123456789") 153 | # class requests.Session类, 和requests外层有的函数/属性基本一致, 只不过是封装了一层跨域请求的功能 154 | 155 | # 重定向与请求历史, 默认情况下, 除了HEAD, Requests会自动处理所有重定向, 可以通过allow_redirects参数禁用重定向处理 156 | # 可以使用响应对象的history方法来追踪重定向, Response.history 是一个Response对象的列表, 按照从最老到最近的请求进行排序 157 | r = requests.get("http://github.com", allow_redirects=True) 158 | print(r.status_code) # 200 159 | print(r.history) # [<Response [301]>] 160 | r = requests.get("http://github.com", allow_redirects=False) 161 | print(r.status_code) # 301 162 | print(r.history) # [] 163 | 164 | # 超时, 设置timeout参数 165 | requests.get("http://github.com", timeout=0.001) 166 | # Traceback (most recent call last): 167 | # File "<stdin>", line 1, in <module> 168 | # requests.exceptions.Timeout: HTTPConnectionPool(host="github.com", port=80): Request timed out. (timeout=0.001) 169 | 170 | # 注意: timeout仅对连接过程有效, 与响应体的下载无关 171 | # timeout并不是整个下载响应的时间限制, 而是如果服务器在timeout秒内没有应答, 将会引发一个异常 172 | # 更精确地说, 是在timeout秒内没有从基础套接字上接收到任何字节的数据时 173 | requests.get("https://github.com", timeout=5) 174 | 175 | # 上边的timeout值将会用作 connect 和 read 二者的timeout, 如果要分别制定, 就传入一个元组 176 | requests.get("https://github.com", timeout=(3.05, 27)) 177 | 178 | # 错误与异常: 遇到网络问题(如: DNS 查询失败、拒绝连接等)时, Requests 会抛出一个 ConnectionError 异常 179 | # 如果 HTTP 请求返回了不成功的状态码, Response.raise_for_status() 会抛出一个 HTTPError 异常 180 | # 若请求超时, 则抛出一个 Timeout 异常 181 | # 若请求超过了设定的最大重定向次数, 则会抛出一个 TooManyRedirects 异常 182 | # 所有Requests显式抛出的异常都继承自 requests.exceptions.RequestException 183 | 184 | # 所有异常: 185 | # exception requests.RequestException(*args, **kwargs): There was an ambiguous exception that occurred while handling your request. 186 | # exception requests.ConnectionError(*args, **kwargs): A Connection error occurred. 187 | # exception requests.HTTPError(*args, **kwargs): An HTTP error occurred. 188 | # exception requests.URLRequired(*args, **kwargs): A valid URL is required to make a request. 189 | # exception requests.TooManyRedirects(*args, **kwargs): Too many redirects. 190 | # exception requests.ConnectTimeout(*args, **kwargs): The request timed out while trying to connect to the remote server. 191 | # exception requests.ReadTimeout(*args, **kwargs): The server did not send any data in the allotted amount of time. 192 | # exception requests.Timeout(*args, **kwargs): The request timed out. 193 | 194 | # SSL证书验证, verify设置为True表示检查证书, 设置为False表示忽略证书 195 | requests.get("https://kennethreitz.com", verify=True) # 未设置SSL证书, 抛出异常 196 | # requests.exceptions.SSLError: hostname "kennethreitz.com" doesn"t match either of "*.herokuapp.com", "herokuapp.com" 197 | requests.get("https://github.com", verify=True) # <Response [200]>, 已设置SSL证书 198 | # 对于私有证书,你也可以传递一个 CA_BUNDLE 文件的路径给 verify 199 | 200 | # 你也可以指定一个本地证书用作客户端证书, 可以是单个文件(包含密钥和证书)或一个包含两个文件路径的元组: 201 | requests.get("https://kennethreitz.com", cert=("/path/server.crt", "/path/key")) 202 | requests.get("https://kennethreitz.com", cert="/wrong_path/server.pem") 203 | # SSLError: [Errno 336265225] _ssl.c:347: error:140B0009:SSL routines:SSL_CTX_use_PrivateKey_file:PEM lib 204 | # 警告: 本地证书的私有 key 必须是解密状态. 目前Requests不支持使用加密的 key 205 | 206 | # 流式上传, 允许你发送大的数据流或文件而无需先把它们读入内存 207 | with open("massive-body") as f: 208 | requests.post("http://some.url/streamed", data=f) 209 | 210 | # 事件挂钩, 可用的钩子: response(从一个请求产生的响应) 211 | # 你可以通过传递一个 {hook_name: callback_function} 字典给 hooks 请求参数为每个请求分配一个钩子函数 212 | def print_url(resp): 213 | print(resp.url) 214 | return 215 | requests.get("http://httpbin.org", hooks=dict(response=print_url)) 216 | 217 | # 代理 218 | proxies = { 219 | "http": "http://10.10.1.10:3128", 220 | "https": "http://10.10.1.10:1080", 221 | } 222 | requests.get("http://example.org", proxies=proxies) 223 | # 若代理需要使用HTTP Basic Auth, 可以使用http://user:password@host:port/, 比如"http": "http://user:pass@10.10.1.10:3128/" 224 | 225 | # 除了基本的 HTTP 代理, Request 还支持 SOCKS 协议的代理, 此时需要单独安装: 226 | # $ pip install requests[socks] 227 | proxies = { 228 | "http": "socks5://user:pass@host:port", 229 | "https": "socks5://user:pass@host:port" 230 | } 231 | requests.get("http://example.org", proxies=proxies) 232 | 233 | # Requests 传输适配器 234 | # 从 v1.0.0 以后,Requests 的内部采用了模块化设计。部分原因是为了实现传输适配器(Transport Adapter)。 235 | # 传输适配器提供了一个机制,让你可以为 HTTP 服务定义交互方法。尤其是它允许你应用服务前的配置。 236 | # Requests 自带了一个传输适配器,也就是 HTTPAdapter。 这个适配器使用了强大的 urllib3,为 Requests 提供了默认的 HTTP 和 HTTPS 交互。 237 | # 每当 Session 被初始化,就会有适配器附着在 Session 上,其中一个供 HTTP 使用,另一个供 HTTPS 使用。 238 | # Request 允许用户创建和使用他们自己的传输适配器,实现他们需要的特殊功能。创建好以后,传输适配器可以被加载到一个会话对象上,附带着一个说明,告诉会话适配器应该应用在哪个 web 服务上。 239 | s = requests.Session() 240 | s.mount("http://baidu.com", requests.adapters.HTTPAdapter()) 241 | 242 | # 出现错误: Connection pool is full, discarding connection: xxxx.com 243 | s.mount('https://', requests.adapters.HTTPAdapter(pool_connections=100, pool_maxsize=100)) 244 | 245 | # 关闭InsecurePlatformWarning 246 | # requests.packages.urllib3.disable_warnings() 247 | -------------------------------------------------------------------------------- /python_restful_api.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | """ 4 | python_restful_api.py by xianhu 5 | """ 6 | 7 | import sqlalchemy 8 | import sqlalchemy.orm 9 | import sqlalchemy.ext.declarative 10 | from flask import Flask, g 11 | from flask_restful import reqparse, Api, Resource 12 | from flask_httpauth import HTTPTokenAuth 13 | 14 | 15 | # Flask相关变量声明 16 | app = Flask(__name__) 17 | api = Api(app) 18 | 19 | # 认证相关 20 | auth = HTTPTokenAuth(scheme="token") 21 | TOKENS = { 22 | "fejiasdfhu", 23 | "fejiuufjeh" 24 | } 25 | 26 | 27 | @auth.verify_token 28 | def verify_token(token): 29 | if token in TOKENS: 30 | g.current_user = token 31 | return True 32 | return False 33 | 34 | 35 | # 数据库相关变量声明 36 | engine = sqlalchemy.create_engine("mysql+pymysql://username:password@ip/db_name", encoding="utf8", echo=False) 37 | BaseModel = sqlalchemy.ext.declarative.declarative_base() 38 | 39 | 40 | # 构建数据模型User 41 | class User(BaseModel): 42 | __tablename__ = "Users" 43 | __table_args__ = { 44 | "mysql_engine": "InnoDB", 45 | "mysql_charset": "utf8", 46 | } 47 | 48 | # 表结构,具体更多的数据类型自行百度 49 | id = sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, autoincrement=True) 50 | name = sqlalchemy.Column("name", sqlalchemy.String(50), nullable=False) 51 | age = sqlalchemy.Column("age", sqlalchemy.Integer, nullable=False) 52 | 53 | 54 | # 构建数据模型的json格式 55 | def get_json(user): 56 | return {"id": user.id, "name": user.name, "age": user.age} 57 | 58 | 59 | # 利用Session对象连接数据库 60 | DBSessinon = sqlalchemy.orm.sessionmaker(bind=engine) 61 | session = DBSessinon() 62 | BaseModel.metadata.drop_all(engine) 63 | BaseModel.metadata.create_all(engine) 64 | 65 | # RESTfulAPI的参数解析 -- put / post参数解析 66 | parser_put = reqparse.RequestParser() 67 | parser_put.add_argument("name", type=str, required=True, help="need name data") 68 | parser_put.add_argument("age", type=int, required=True, help="need age data") 69 | 70 | # RESTfulAPI的参数解析 -- get参数解析 71 | parser_get = reqparse.RequestParser() 72 | parser_get.add_argument("limit", type=int, required=False) 73 | parser_get.add_argument("offset", type=int, required=False) 74 | parser_get.add_argument("sortby", type=str, required=False) 75 | 76 | 77 | # 操作(put / get / delete)单一资源 78 | class Todo(Resource): 79 | # 添加认证 80 | decorators = [auth.login_required] 81 | 82 | def put(self, user_id): 83 | """ 84 | 更新用户数据: curl http://127.0.0.1:5000/users/1 -X PUT -d "name=Allen&age=20" -H "Authorization: token fejiasdfhu" 85 | """ 86 | args = parser_put.parse_args() 87 | user_ids_set = set([user.id for user in session.query(User.id)]) 88 | print(user_ids_set) 89 | 90 | # 用户不存在,返回404 91 | if user_id not in user_ids_set: 92 | return None, 404 93 | 94 | # 更新用户数据 95 | user = session.query(User).filter(User.id == user_id)[0] 96 | user.name = args["name"] 97 | user.age = args["age"] 98 | session.merge(user) 99 | session.commit() 100 | 101 | # 更新成功,返回201 102 | return get_json(user), 201 103 | 104 | def get(self, user_id): 105 | """ 106 | 获取用户数据: curl http://127.0.0.1:5000/users/1 -X GET -H "Authorization: token fejiasdfhu" 107 | """ 108 | users = session.query(User).filter(User.id == user_id) 109 | 110 | # 用户不存在,返回404 111 | if users.count() == 0: 112 | return None, 404 113 | 114 | # 返回用户数据 115 | return get_json(users[0]), 200 116 | 117 | def delete(self, user_id): 118 | """ 119 | 删除用户数据: curl http://127.0.0.1:5000/users/1 -X DELETE -H "Authorization: token fejiasdfhu" 120 | """ 121 | session.query(User).filter(User.id == user_id).delete() 122 | return None, 204 123 | 124 | 125 | # 操作(post / get)资源列表 126 | class TodoList(Resource): 127 | # 添加认证 128 | decorators = [auth.login_required] 129 | 130 | def get(self): 131 | """ 132 | 获取全部用户数据: curl http://127.0.0.1:5000/users -X GET -d "limit=2&offset=0&sortby=name" -H "Authorization: token fejiasdfhu" 133 | """ 134 | args = parser_get.parse_args() 135 | users = session.query(User) 136 | 137 | # 根据条件查询 138 | if "sortby" in args: 139 | users = users.order_by(User.name if args["sortby"] == "name" else User.age) 140 | if "offset" in args: 141 | users = users.offset(args["offset"]) 142 | if "limit" in args: 143 | users = users.limit(args["limit"]) 144 | 145 | # 返回结果 146 | return [get_json(user) for user in users], 200 147 | 148 | def post(self): 149 | """ 150 | 添加一个新用户: curl http://127.0.0.1:5000/users -X POST -d "name=Brown&age=20" -H "Authorization: token fejiasdfhu" 151 | """ 152 | args = parser_put.parse_args() 153 | 154 | # 构建新用户 155 | user = User(name=args["name"], age=args["age"]) 156 | session.add(user) 157 | session.commit() 158 | 159 | # 资源添加成功,返回201 160 | return get_json(user), 201 161 | 162 | 163 | # 设置路由 164 | api.add_resource(TodoList, "/users") 165 | api.add_resource(Todo, "/users/<int:user_id>") 166 | 167 | 168 | if __name__ == "__main__": 169 | app.run(debug=True) 170 | 171 | 172 | """ 常见返回代码 173 | 200 OK - [GET]:服务器成功返回用户请求的数据 174 | 201 CREATED - [POST/PUT/PATCH]:用户新建或修改数据成功 175 | 202 Accepted - [*]:表示一个请求已经进入后台排队(异步任务) 176 | 204 NO CONTENT - [DELETE]:用户删除数据成功 177 | 400 INVALID REQUEST - [POST/PUT/PATCH]:用户发出的请求有错误,服务器没有进行新建或修改数据的操作 178 | 401 Unauthorized - [*]:表示用户没有权限(令牌、用户名、密码错误) 179 | 403 Forbidden - [*] 表示用户得到授权(与401错误相对),但是访问是被禁止的 180 | 404 NOT FOUND - [*]:用户发出的请求针对的是不存在的记录,服务器没有进行操作 181 | 406 Not Acceptable - [GET]:用户请求的格式不可得 182 | 410 Gone -[GET]:用户请求的资源被永久删除,且不会再得到的 183 | 422 Unprocesable entity - [POST/PUT/PATCH] 当创建一个对象时,发生一个验证错误 184 | 500 INTERNAL SERVER ERROR - [*]:服务器发生错误,用户将无法判断发出的请求是否成功 185 | """ 186 | -------------------------------------------------------------------------------- /python_spider.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | """ 4 | python_spider.py by xianhu 5 | """ 6 | 7 | import urllib.error 8 | import urllib.parse 9 | import urllib.request 10 | import http.cookiejar 11 | 12 | # 首先定义下边可能需要的变量 13 | url = "https://www.baidu.com" 14 | headers = {"User-Agent": "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"} 15 | 16 | # 最简单的网页抓取方式 17 | response = urllib.request.urlopen(url, timeout=10) 18 | html = response.read().decode("utf-8") 19 | 20 | 21 | # 使用Request实例代替url 22 | request = urllib.request.Request(url, data=None, headers={}) 23 | response = urllib.request.urlopen(request, timeout=10) 24 | 25 | 26 | # 发送数据,即在Request()中添加data参数 27 | data = urllib.parse.urlencode({"act": "login", "email": "xianhu@qq.com", "password": "123456"}) 28 | request1 = urllib.request.Request(url, data=data) # POST方法 29 | request2 = urllib.request.Request(url+"?%s" % data) # GET方法 30 | response = urllib.request.urlopen(request, timeout=10) 31 | 32 | 33 | # 发送Header,即在Request()中添加headers参数 34 | request = urllib.request.Request(url, data=data, headers=headers) # 参数中添加header参数 35 | request.add_header("Referer", "http://www.baidu.com") # 另一种添加header的方式,添加Referer是为了应对"反盗链" 36 | response = urllib.request.urlopen(request, timeout=10) 37 | 38 | 39 | # 网页抓取引发异常:urllib.error.HTTPError, urllib.error.URLError, 两者存在继承关系 40 | try: 41 | urllib.request.urlopen(request, timeout=10) 42 | except urllib.error.HTTPError as e: 43 | print(e.code, e.reason) 44 | except urllib.error.URLError as e: 45 | print(e.errno, e.reason) 46 | 47 | 48 | # 使用代理,以防止IP被封或IP次数受限: 49 | proxy_handler = urllib.request.ProxyHandler(proxies={"http": "111.123.76.12:8080"}) 50 | 51 | opener = urllib.request.build_opener(proxy_handler) # 利用代理创建opener实例 52 | response = opener.open(url) # 直接利用opener实例打开url 53 | 54 | urllib.request.install_opener(opener) # 安装全局opener,然后利用urlopen打开url 55 | response = urllib.request.urlopen(url) 56 | 57 | 58 | # 使用cookie和cookiejar,应对服务器检查 59 | cookie_jar = http.cookiejar.CookieJar() 60 | cookie_jar_handler = urllib.request.HTTPCookieProcessor(cookiejar=cookie_jar) 61 | opener = urllib.request.build_opener(cookie_jar_handler) 62 | response = opener.open(url) 63 | 64 | 65 | # 发送在浏览器中获取的cookie,两种方式: 66 | # (1)直接放到headers里 67 | headers = { 68 | "User-Agent": "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)", 69 | "Cookie": "PHPSESSID=btqkg9amjrtoeev8coq0m78396; USERINFO=n6nxTHTY%2BJA39z6CpNB4eKN8f0KsYLjAQTwPe%2BhLHLruEbjaeh4ulhWAS5RysUM%2B; " 70 | } 71 | request = urllib.request.Request(url, headers=headers) 72 | 73 | # (2)构建cookie,添加到cookiejar中 74 | cookie = http.cookiejar.Cookie(name="xx", value="xx", domain="xx", ...) 75 | cookie_jar.set_cookie(cookie) 76 | response = opener.open(url) 77 | 78 | 79 | # 同时使用代理和cookiejar 80 | opener = urllib.request.build_opener(cookie_jar_handler) 81 | opener.add_handler(proxy_handler) 82 | response = opener.open("https://www.baidu.com/") 83 | 84 | 85 | # 抓取网页中的图片:同样适用于抓取网络上的文件。右击鼠标,找到图片属性中的地址,然后进行保存。 86 | response = urllib.request.urlopen("http://ww3.sinaimg.cn/large/7d742c99tw1ee7dac2766j204q04qmxq.jpg", timeout=120) 87 | with open("test.jpg", "wb") as file_img: 88 | file_img.write(response.read()) 89 | 90 | 91 | # HTTP认证:即HTTP身份验证 92 | password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm() # 创建一个PasswordMgr 93 | password_mgr.add_password(realm=None, uri=url, user='username', passwd='password') # 添加用户名和密码 94 | handler = urllib.request.HTTPBasicAuthHandler(password_mgr) # 创建HTTPBasicAuthHandler 95 | opener = urllib.request.build_opener(handler) # 创建opner 96 | response = opener.open(url, timeout=10) # 获取数据 97 | -------------------------------------------------------------------------------- /python_sqlalchemy.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | """ 4 | python_sqlalchemy.py by xianhu 5 | """ 6 | 7 | import sqlalchemy 8 | import sqlalchemy.orm 9 | import sqlalchemy.ext.declarative 10 | 11 | # 利用数据库字符串构造engine, echo为True将打印所有的sql语句, 其他数据库的链接方式可自行百度 12 | # engine = sqlalchemy.create_engine("mysql+pymysql://username:password@hostname/dbname", encoding="utf8", echo=True) 13 | engine = sqlalchemy.create_engine("mysql+pymysql://dba_0:mimadba_0@101.200.174.172/data_secret", encoding="utf8", echo=False) 14 | 15 | """ 16 | # 利用engine创建connection,因为使用了with所以不需要close操作,这部分不是重点 17 | with engine.connect() as conn: 18 | # 最基础的用法 19 | result = conn.execute("select * from tablename limit 10;") 20 | for item in result: 21 | print(item) 22 | 23 | # execute的几种用法,这里具体还是得参考pymysql的用法,不需要执行commit操作 24 | conn.execute("insert into tablename(id, url, title) values(1, 'url1', 'title1');") 25 | conn.execute("insert into tablename(id, url, title) values(%s, %s, %s);", 2, "url2", "title2") 26 | conn.execute("insert into tablename(id, url, title) values(%s, %s, %s)", (3, "url3", "title3")) 27 | conn.execute("insert into tablename(id, url, title) values(%s, %s, %s)", [(31, "url31", "title31"), (32, "url32", "title32")]) 28 | 29 | # 使用事务可以进行批量提交和回滚 30 | trans = conn.begin() 31 | try: 32 | conn.execute("insert into tablename(id, url, title) values(%s, %s, %s)", [(4, "url4", "title4"), (5, "url5", "title5")]) 33 | trans.commit() 34 | except Exception as excep: 35 | trans.rollback() 36 | raise 37 | trans.close() 38 | """ 39 | 40 | # 首先需要生成一个BaseModel类,作为所有模型类的基类 41 | BaseModel = sqlalchemy.ext.declarative.declarative_base() 42 | 43 | 44 | # 构建数据模型User 45 | class User(BaseModel): 46 | __tablename__ = "Users" # 表名 47 | __table_args__ = { 48 | "mysql_engine": "InnoDB", # 表的引擎 49 | "mysql_charset": "utf8", # 表的编码格式 50 | } 51 | 52 | # 表结构,具体更多的数据类型自行百度 53 | id = sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, autoincrement=True) 54 | name = sqlalchemy.Column("name", sqlalchemy.String(50), nullable=False) 55 | age = sqlalchemy.Column("age", sqlalchemy.Integer, default=0) 56 | 57 | # 添加角色id外键,关联到表Roles的id属性 58 | role_id = sqlalchemy.Column("role_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("Roles.id")) 59 | 60 | # 添加关系属性,关联到本实例的role_id外键属性上 61 | role = sqlalchemy.orm.relationship("Role", foreign_keys="User.role_id") 62 | 63 | # 添加关系属性,关联到本实例的role_id外键属性上,如果使用了这种方式,Role模型中的users可以省略 64 | # role = sqlalchemy.orm.relationship("Role", foreign_keys="User.role_id", backref=sqlalchemy.orm.backref("users")) 65 | 66 | 67 | # 构建数据模型Role 68 | class Role(BaseModel): 69 | __tablename__ = "Roles" # 表名 70 | __table_args__ = { 71 | "mysql_engine": "InnoDB", # 表的引擎 72 | "mysql_charset": "utf8", # 表的编码格式 73 | } 74 | 75 | # 表结构,具体更多的数据类型自行百度 76 | id = sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True) 77 | name = sqlalchemy.Column("name", sqlalchemy.String(50), unique=True) 78 | 79 | # 添加关系属性,关联到实例User的role_id外键属性上 80 | users = sqlalchemy.orm.relationship("User", foreign_keys="User.role_id") 81 | 82 | 83 | # 利用Session对象连接数据库 84 | DBSessinon = sqlalchemy.orm.sessionmaker(bind=engine) # 创建会话类 85 | session = DBSessinon() # 创建会话对象 86 | 87 | 88 | # 删除所有表 89 | BaseModel.metadata.drop_all(engine) 90 | # 创建所有表,如果表已经存在,则不会创建 91 | BaseModel.metadata.create_all(engine) 92 | 93 | try: 94 | # 清空数据,不需要commit操作 95 | session.query(User).filter(User.id != -1).delete() 96 | session.query(Role).filter(Role.id != -1).delete() 97 | # 删除数据的另外一种形式:session.delete() 98 | 99 | # 插入数据,这里的一个实例只插入一次,第二次插入不生效 100 | session.add(Role(id=1, name="student")) 101 | session.add(Role(id=2, name="teacher")) 102 | session.commit() 103 | 104 | session.add(User(name="James", age=20, role_id=1)) 105 | session.add(User(name="Wade", age=40, role_id=2)) 106 | session.commit() 107 | 108 | user = User(name="Kobe", age=24, role_id=1) 109 | session.add(user) 110 | session.commit() 111 | 112 | # 修改数据 113 | user.name = "Allen" 114 | session.merge(user) # 使用merge方法,如果存在则修改,如果不存在则插入 115 | session.query(User).filter(User.id == user.id).update({User.name: "Allen"}) # 使用update方法 116 | session.query(User).filter(User.id == user.id).update({User.age: User.age + 1}) # 使用update方法,自增操作 117 | 118 | # 查询数据 119 | roles = session.query(Role) # 返回全部结果 120 | for role in roles: 121 | print("Role:", role.id, role.name) 122 | 123 | users = session.query(User) # 返回全部结果 124 | for user in users: 125 | print("User:", user.id, user.name, user.age, user.role_id) 126 | 127 | # 其他获取数据的方式 128 | print("get(id):", session.query(User).get(1)) # 返回结果集中id为1的项 129 | print("get[1:3]:", session.query(User)[1:3]) # 返回结果集中的第2-3项 130 | 131 | # 其他高级查询,这里以Users表为例 132 | users = session.query(User).filter(User.id > 6) # 条件查询 133 | users = session.query(User).filter(User.id > 6).all() # 条件查询,返回查询的全部数据 134 | user = session.query(User).filter(User.id > 6).first() # 条件查询,返回查询数据的第一项 135 | users = session.query(User).filter(User.id > 6).limit(10) # 条件查询,返回最多10条数据 136 | users = session.query(User).filter(User.id > 6).offset(2) # 条件查询,从第3条数据开始返回 137 | 138 | users = session.query(User).filter(User.id > 6, User.name == "Kobe") # 条件查询,and操作 139 | users = session.query(User).filter(User.id > 6).filter(User.name == "Kobe") # 条件查询,and操作 140 | users = session.query(User).filter(sqlalchemy.or_(User.id > 6, User.name == "Kobe")) # 条件查询,or操作 141 | users = session.query(User).filter(User.id.in_((1, 2))) # 条件查询,in操作 142 | users = session.query(User).filter(sqlalchemy.not_(User.name)) # 条件查询,not操作 143 | 144 | user_count = session.query(User.id).count() # 统计全部user的数量 145 | user_count = session.query(sqlalchemy.func.count(User.id)).scalar() # scalar操作返回第一行数据的第一个字段 146 | session.query(sqlalchemy.func.count("*")).select_from(User).scalar() # scalar操作返回第一行数据的第一个字段 147 | session.query(sqlalchemy.func.count(1)).select_from(User).scalar() # scalar操作返回第一行数据的第一个字段 148 | session.query(sqlalchemy.func.count(User.id)).filter(User.id > 0).scalar() # filter() 中包含 User,因此不需要指定表 149 | 150 | session.query(sqlalchemy.func.sum(User.age)).scalar() # 求和运算,运用scalar函数 151 | session.query(sqlalchemy.func.avg(User.age)).scalar() # 求均值运算,运用scalar函数 152 | session.query(sqlalchemy.func.md5(User.name)).filter(User.id == 1).scalar() # 运用md5函数 153 | 154 | users = session.query(sqlalchemy.distinct(User.name)) # 去重查询,根据name进行去重 155 | users = session.query(User).order_by(User.name) # 排序查询,正序查询 156 | users = session.query(User).order_by(User.name.desc()) # 排序查询,倒序查询 157 | users = session.query(User).order_by(sqlalchemy.desc(User.name)) # 排序查询,倒序查询的另外一种形式 158 | 159 | users = session.query(User.id, User.name) # 只查询部分属性 160 | users = session.query(User.name.label("user_name")) # 结果集的列取别名 161 | for user in users: 162 | print("label test:", user.user_name) # 这里使用别名 163 | 164 | users = session.query(sqlalchemy.func.count(User.name).label("count"), User.age).group_by(User.age) # 分组查询 165 | for user in users: 166 | print("age:{0}, count:{1}".format(user.age, user.count)) 167 | 168 | # 多表查询 169 | result = session.query(User, Role).filter(User.role_id == Role.id) 170 | for user, role in result: 171 | print("user %s's role is %s" % (user.name, role.name)) 172 | users = session.query(User).join(Role, User.role_id == Role.id) 173 | for user in users: 174 | print("user join, name:", user.name) 175 | 176 | # 关联属性的用法 177 | roles = session.query(Role) 178 | for role in roles: 179 | print("role:%s users:" % role.name) 180 | for user in role.users: 181 | print("\t%s" % user.name) 182 | users = session.query(User) 183 | for user in users: 184 | print("user %s's role is %s" % (user.name, user.role.name)) 185 | 186 | except Exception as excep: 187 | session.rollback() 188 | raise 189 | 190 | session.close() 191 | -------------------------------------------------------------------------------- /python_thread_multiprocess.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | """ 4 | python_thread_multiprocee.py by xianhu 5 | """ 6 | 7 | import time 8 | import threading 9 | import multiprocessing 10 | 11 | # 定义全局变量Queue 12 | g_queue = multiprocessing.Queue() 13 | g_search_list = list(range(10000)) 14 | 15 | 16 | # 定义一个IO密集型任务:利用time.sleep() 17 | def task_io(task_id): 18 | print("IOTask[%s] start" % task_id) 19 | while not g_queue.empty(): 20 | time.sleep(1) 21 | try: 22 | data = g_queue.get(block=True, timeout=1) 23 | print("IOTask[%s] get data: %s" % (task_id, data)) 24 | except Exception as excep: 25 | print("IOTask[%s] error: %s" % (task_id, str(excep))) 26 | print("IOTask[%s] end" % task_id) 27 | return 28 | 29 | 30 | # 定义一个计算密集型任务:利用一些复杂加减乘除、列表查找等 31 | def task_cpu(task_id): 32 | print("CPUTask[%s] start" % task_id) 33 | while not g_queue.empty(): 34 | count = 0 35 | for i in range(10000): 36 | count += pow(3*2, 3*2) if i in g_search_list else 0 37 | try: 38 | data = g_queue.get(block=True, timeout=1) 39 | print("CPUTask[%s] get data: %s" % (task_id, data)) 40 | except Exception as excep: 41 | print("CPUTask[%s] error: %s" % (task_id, str(excep))) 42 | print("CPUTask[%s] end" % task_id) 43 | return task_id 44 | 45 | 46 | def init_queue(): 47 | print("init g_queue start") 48 | while not g_queue.empty(): 49 | g_queue.get() 50 | for _index in range(10): 51 | g_queue.put(_index) 52 | print("init g_queue end") 53 | return 54 | 55 | 56 | if __name__ == '__main__': 57 | print("cpu count:", multiprocessing.cpu_count(), "\n") 58 | 59 | print("========== 直接执行IO密集型任务 ==========") 60 | init_queue() 61 | time_0 = time.time() 62 | task_io(0) 63 | print("结束:", time.time() - time_0, "\n") 64 | 65 | print("========== 多线程执行IO密集型任务 ==========") 66 | init_queue() 67 | time_0 = time.time() 68 | thread_list = [threading.Thread(target=task_io, args=(i,)) for i in range(5)] 69 | for t in thread_list: 70 | t.start() 71 | for t in thread_list: 72 | if t.is_alive(): 73 | t.join() 74 | print("结束:", time.time() - time_0, "\n") 75 | 76 | print("========== 多进程执行IO密集型任务 ==========") 77 | init_queue() 78 | time_0 = time.time() 79 | process_list = [multiprocessing.Process(target=task_io, args=(i,)) for i in range(multiprocessing.cpu_count())] 80 | for p in process_list: 81 | p.start() 82 | for p in process_list: 83 | if p.is_alive(): 84 | p.join() 85 | print("结束:", time.time() - time_0, "\n") 86 | 87 | print("========== 直接执行CPU密集型任务 ==========") 88 | init_queue() 89 | time_0 = time.time() 90 | task_cpu(0) 91 | print("结束:", time.time() - time_0, "\n") 92 | 93 | print("========== 多线程执行CPU密集型任务 ==========") 94 | init_queue() 95 | time_0 = time.time() 96 | thread_list = [threading.Thread(target=task_cpu, args=(i,)) for i in range(5)] 97 | for t in thread_list: 98 | t.start() 99 | for t in thread_list: 100 | if t.is_alive(): 101 | t.join() 102 | print("结束:", time.time() - time_0, "\n") 103 | 104 | print("========== 多进程执行cpu密集型任务 ==========") 105 | init_queue() 106 | time_0 = time.time() 107 | process_list = [multiprocessing.Process(target=task_cpu, args=(i,)) for i in range(multiprocessing.cpu_count())] 108 | for p in process_list: 109 | p.start() 110 | for p in process_list: 111 | if p.is_alive(): 112 | p.join() 113 | print("结束:", time.time() - time_0, "\n") 114 | 115 | exit() 116 | -------------------------------------------------------------------------------- /python_version36.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | """ 4 | python_version36.py by xianhu 5 | """ 6 | 7 | import asyncio 8 | import decimal 9 | from typing import List, Dict 10 | 11 | # Formatted string literals 12 | name = "Fred" 13 | print(f"He said his name is {name}.") # 'He said his name is Fred.' 14 | print("He said his name is {name}.".format(**locals())) 15 | 16 | width = 10 17 | precision = 4 18 | value = decimal.Decimal("12.34567") 19 | print(f"result: {value:{width}.{precision}}") #'result: 12.35' 20 | 21 | 22 | # variable annotations 23 | def test(a: List[int], b: int) -> int: 24 | return a[0] + b 25 | print(test([3, 1], 2)) 26 | 27 | primes: List[int] = [] 28 | captain: str 29 | 30 | class Starship(object): 31 | stats: Dict[str, int] = {} 32 | 33 | 34 | # Underscores in Numeric Literals 35 | a = 1_000_000_000_000_000 # 1000000000000000 36 | b = 0x_FF_FF_FF_FF # 4294967295 37 | 38 | '{:_}'.format(1000000) # '1_000_000' 39 | '{:_x}'.format(0xFFFFFFFF) # 'ffff_ffff' 40 | 41 | 42 | # Asynchronous Generators 43 | async def ticker(delay, to): 44 | """Yield numbers from 0 to *to* every *delay* seconds.""" 45 | for i in range(to): 46 | yield i 47 | await asyncio.sleep(delay) 48 | 49 | 50 | # Asynchronous Comprehensions 51 | result = [i async for i in aiter() if i % 2] 52 | result = [await fun() for fun in funcs if await condition()] 53 | -------------------------------------------------------------------------------- /python_visual.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | """ 4 | python_visual.py by xianhu 5 | """ 6 | 7 | import numpy as np 8 | import matplotlib 9 | import matplotlib.mlab as mlab 10 | import matplotlib.pyplot as plt 11 | import matplotlib.font_manager as fm 12 | from mpl_toolkits.mplot3d import Axes3D 13 | 14 | # 解决中文乱码问题 15 | myfont = fm.FontProperties(fname="/Library/Fonts/Songti.ttc", size=14) 16 | matplotlib.rcParams["axes.unicode_minus"] = False 17 | 18 | 19 | def simple_plot(): 20 | """ 21 | simple plot 22 | """ 23 | # 生成测试数据 24 | x = np.linspace(-np.pi, np.pi, 256, endpoint=True) 25 | y_cos, y_sin = np.cos(x), np.sin(x) 26 | 27 | # 生成画布,并设定标题 28 | plt.figure(figsize=(8, 6), dpi=80) 29 | plt.title("简单曲线图", fontproperties=myfont) 30 | plt.grid(True) 31 | 32 | # 设置X轴 33 | plt.xlabel("X轴", fontproperties=myfont) 34 | plt.xlim(-4.0, 4.0) 35 | plt.xticks(np.linspace(-4, 4, 9, endpoint=True)) 36 | 37 | # 设置Y轴 38 | plt.ylabel("Y轴", fontproperties=myfont) 39 | plt.ylim(-1.0, 1.0) 40 | plt.yticks(np.linspace(-1, 1, 9, endpoint=True)) 41 | 42 | # 画两条曲线 43 | plt.plot(x, y_cos, "b--", linewidth=2.0, label="cos示例") 44 | plt.plot(x, y_sin, "g-", linewidth=2.0, label="sin示例") 45 | 46 | # 设置图例位置,loc可以为[upper, lower, left, right, center] 47 | plt.legend(loc="upper left", prop=myfont, shadow=True) 48 | 49 | # 图形显示 50 | plt.show() 51 | return 52 | # simple_plot() 53 | 54 | 55 | def simple_advanced_plot(): 56 | """ 57 | simple advanced plot 58 | """ 59 | # 生成测试数据 60 | x = np.linspace(-np.pi, np.pi, 256, endpoint=True) 61 | y_cos, y_sin = np.cos(x), np.sin(x) 62 | 63 | # 生成画布, 并设定标题 64 | plt.figure(figsize=(8, 6), dpi=80) 65 | plt.title("复杂曲线图", fontproperties=myfont) 66 | plt.grid(True) 67 | 68 | # 画图的另外一种方式 69 | ax_1 = plt.subplot(111) 70 | ax_1.plot(x, y_cos, color="blue", linewidth=2.0, linestyle="--", label="左cos") 71 | ax_1.legend(loc="upper left", prop=myfont, shadow=True) 72 | 73 | # 设置Y轴(左边) 74 | ax_1.set_ylabel("左cos的y轴", fontproperties=myfont) 75 | ax_1.set_ylim(-1.0, 1.0) 76 | ax_1.set_yticks(np.linspace(-1, 1, 9, endpoint=True)) 77 | 78 | # 画图的另外一种方式 79 | ax_2 = ax_1.twinx() 80 | ax_2.plot(x, y_sin, color="green", linewidth=2.0, linestyle="-", label="右sin") 81 | ax_2.legend(loc="upper right", prop=myfont, shadow=True) 82 | 83 | # 设置Y轴(右边) 84 | ax_2.set_ylabel("右sin的y轴", fontproperties=myfont) 85 | ax_2.set_ylim(-2.0, 2.0) 86 | ax_2.set_yticks(np.linspace(-2, 2, 9, endpoint=True)) 87 | 88 | # 设置X轴(共同) 89 | ax_1.set_xlabel("x轴", fontproperties=myfont) 90 | ax_1.set_xlim(-4.0, 4.0) 91 | ax_1.set_xticks(np.linspace(-4, 4, 9, endpoint=True)) 92 | 93 | # 图形显示 94 | plt.show() 95 | return 96 | # simple_advanced_plot() 97 | 98 | 99 | def subplot_plot(): 100 | """ 101 | subplot plot 102 | """ 103 | # 子图的style列表 104 | style_list = ["g+-", "r*-", "b.-", "yo-"] 105 | 106 | # 依次画图 107 | for num in range(4): 108 | # 生成测试数据 109 | x = np.linspace(0.0, 2+num, num=10*(num+1)) 110 | y = np.sin((5-num) * np.pi * x) 111 | 112 | # 子图的生成方式 113 | plt.subplot(2, 2, num+1) 114 | plt.title("子图 %d" % (num+1), fontproperties=myfont) 115 | plt.plot(x, y, style_list[num]) 116 | 117 | # 图形显示 118 | plt.show() 119 | return 120 | # subplot_plot() 121 | 122 | 123 | def bar_plot(): 124 | """ 125 | bar plot 126 | """ 127 | # 生成测试数据 128 | means_men = (20, 35, 30, 35, 27) 129 | means_women = (25, 32, 34, 20, 25) 130 | 131 | # 设置标题 132 | plt.title("柱状图", fontproperties=myfont) 133 | 134 | # 设置相关参数 135 | index = np.arange(len(means_men)) 136 | bar_width = 0.35 137 | 138 | # 画柱状图 139 | plt.bar(index, means_men, width=bar_width, alpha=0.2, color="b", label="男生") 140 | plt.bar(index+bar_width, means_women, width=bar_width, alpha=0.8, color="r", label="女生") 141 | plt.legend(loc="upper right", prop=myfont, shadow=True) 142 | 143 | # 设置柱状图标示 144 | for x, y in zip(index, means_men): 145 | plt.text(x, y+0.3, y, ha="center", va="bottom") 146 | for x, y in zip(index, means_women): 147 | plt.text(x+bar_width, y+0.3, y, ha="center", va="bottom") 148 | 149 | # 设置刻度范围/坐标轴名称等 150 | plt.ylim(0, 45) 151 | plt.xlabel("分组Group", fontproperties=myfont) 152 | plt.ylabel("得分Scores", fontproperties=myfont) 153 | plt.xticks(index+(bar_width/2), ("A组", "B组", "C组", "D组", "E组"), fontproperties=myfont) 154 | 155 | # 图形显示 156 | plt.show() 157 | return 158 | # bar_plot() 159 | 160 | 161 | def barh_plot(): 162 | """ 163 | barh plot 164 | """ 165 | # 生成测试数据 166 | means_men = (20, 35, 30, 35, 27) 167 | means_women = (25, 32, 34, 20, 25) 168 | 169 | # 设置标题 170 | plt.title("横向柱状图", fontproperties=myfont) 171 | 172 | # 设置相关参数 173 | index = np.arange(len(means_men)) 174 | bar_height = 0.35 175 | 176 | # 画柱状图(水平方向) 177 | plt.barh(index, means_men, height=bar_height, alpha=0.2, color="b", label="Men") 178 | plt.barh(index+bar_height, means_women, height=bar_height, alpha=0.8, color="r", label="Women") 179 | plt.legend(loc="upper right", shadow=True) 180 | 181 | # 设置柱状图标示 182 | for x, y in zip(index, means_men): 183 | plt.text(y+0.3, x, y, ha="left", va="center") 184 | for x, y in zip(index, means_women): 185 | plt.text(y+0.3, x+bar_height, y, ha="left", va="center") 186 | 187 | # 设置刻度范围/坐标轴名称等 188 | plt.xlim(0, 45) 189 | plt.xlabel("Scores") 190 | plt.ylabel("Group") 191 | plt.yticks(index+(bar_height/2), ("A", "B", "C", "D", "E")) 192 | 193 | # 图形显示 194 | plt.show() 195 | return 196 | # barh_plot() 197 | 198 | 199 | def bar_advanced_plot(): 200 | """ 201 | bar advanced plot 202 | """ 203 | # 生成测试数据 204 | means_men = np.array((20, 35, 30, 35, 27, 25, 32, 34, 20, 25)) 205 | means_women = np.array((25, 32, 34, 20, 25, 20, 35, 30, 35, 27)) 206 | 207 | # 设置标题 208 | plt.title("高级柱状图", fontproperties=myfont) 209 | 210 | # 设置相关参数 211 | index = np.arange(len(means_men)) 212 | bar_width = 0.8 213 | 214 | # 画柱状图(两种:X轴以上/X轴以下) 215 | plt.bar(index, means_men, width=bar_width, alpha=0.4, color="b", label="Men") 216 | plt.bar(index, -means_women, width=bar_width, alpha=0.4, color="r", label="Women") 217 | 218 | # 画折线图(两种,和柱状图对应) 219 | plt.plot(index, means_men, marker="o", linestyle="-", color="r", label="Men line") 220 | plt.plot(index, -means_women, marker=".", linestyle="--", color="b", label="Women line") 221 | 222 | # 设置图形标示(两种,和柱状图对应) 223 | for x, y in zip(index, means_men): 224 | plt.text(x, y+1, y, ha="center", va="bottom") 225 | for x, y in zip(index, means_women): 226 | plt.text(x, -y-1, y, ha="center", va="top") 227 | 228 | # 设置Y轴和图例位置 229 | plt.ylim(-45, 80) 230 | plt.legend(loc="upper left", shadow=True) 231 | 232 | # 图形显示 233 | plt.show() 234 | return 235 | # bar_advanced_plot() 236 | 237 | 238 | def table_plot(): 239 | """ 240 | table plot 241 | """ 242 | # 生成测试数据 243 | data = np.array([ 244 | [1, 4, 2, 5, 2], 245 | [2, 1, 1, 3, 6], 246 | [5, 3, 6, 4, 1] 247 | ]) 248 | 249 | # 设置标题 250 | plt.title("层次柱状图", fontproperties=myfont) 251 | 252 | # 设置相关参数 253 | index = np.arange(len(data[0])) 254 | color_index = ["r", "g", "b"] 255 | 256 | # 声明底部位置 257 | bottom = np.array([0, 0, 0, 0, 0]) 258 | 259 | # 依次画图,并更新底部位置 260 | for i in range(len(data)): 261 | plt.bar(index, data[i], width=0.5, color=color_index[i], bottom=bottom, alpha=0.7, label="标签 %d" % i) 262 | bottom += data[i] 263 | 264 | # 设置图例位置 265 | plt.legend(loc="upper left", prop=myfont, shadow=True) 266 | 267 | # 图形显示 268 | plt.show() 269 | return 270 | # table_plot() 271 | 272 | 273 | def histograms_plot(): 274 | """ 275 | histograms plot 276 | """ 277 | # 生成测试数据 278 | mu, sigma = 100, 15 279 | x = mu + sigma * np.random.randn(10000) 280 | 281 | # 设置标题 282 | plt.title("直方图", fontproperties=myfont) 283 | 284 | # 画直方图, 并返回相关结果 285 | n, bins, patches = plt.hist(x, bins=50, normed=1, cumulative=False, color="green", alpha=0.6, label="直方图") 286 | 287 | # 根据直方图返回的结果, 画折线图 288 | y = mlab.normpdf(bins, mu, sigma) 289 | plt.plot(bins, y, "r--", label="线条") 290 | 291 | # 设置图例位置 292 | plt.legend(loc="upper left", prop=myfont, shadow=True) 293 | 294 | # 图形显示 295 | plt.show() 296 | return 297 | # histograms_plot() 298 | 299 | 300 | def pie_plot(): 301 | """ 302 | pie plot 303 | """ 304 | # 生成测试数据 305 | sizes = [15, 30, 45, 10] 306 | labels = ["Frogs", "中文", "Dogs", "Logs"] 307 | colors = ["yellowgreen", "gold", "lightskyblue", "lightcoral"] 308 | 309 | # 设置标题 310 | plt.title("饼图", fontproperties=myfont) 311 | 312 | # 设置突出参数 313 | explode = [0, 0.05, 0, 0] 314 | 315 | # 画饼状图 316 | patches, l_text, p_text = plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct="%1.1f%%", shadow=True, startangle=90) 317 | for text in l_text: 318 | text.set_fontproperties(myfont) 319 | plt.axis("equal") 320 | 321 | # 图形显示 322 | plt.show() 323 | return 324 | # pie_plot() 325 | 326 | 327 | def scatter_plot(): 328 | """ 329 | scatter plot 330 | """ 331 | # 生成测试数据 332 | point_count = 1000 333 | x_index = np.random.random(point_count) 334 | y_index = np.random.random(point_count) 335 | 336 | # 设置标题 337 | plt.title("散点图", fontproperties=myfont) 338 | 339 | # 设置相关参数 340 | color_list = np.random.random(point_count) 341 | scale_list = np.random.random(point_count) * 100 342 | 343 | # 画散点图 344 | plt.scatter(x_index, y_index, s=scale_list, c=color_list, marker="o") 345 | 346 | # 图形显示 347 | plt.show() 348 | return 349 | # scatter_plot() 350 | 351 | 352 | def fill_plot(): 353 | """ 354 | fill plot 355 | """ 356 | # 生成测试数据 357 | x = np.linspace(-2*np.pi, 2*np.pi, 1000, endpoint=True) 358 | y = np.sin(x) 359 | 360 | # 设置标题 361 | plt.title("填充图", fontproperties=myfont) 362 | 363 | # 画图 364 | plt.plot(x, y, color="blue", alpha=1.00) 365 | 366 | # 填充图形, plt.fill_between(x, y1, y2, where=None, *kwargs) 367 | plt.fill_between(x, 0, y, where=(y > 0), color="blue", alpha=0.25) 368 | plt.fill_between(x, 0, y, where=(y < 0), color="red", alpha=0.25) 369 | 370 | # 图形显示 371 | plt.show() 372 | return 373 | # fill_plot() 374 | 375 | 376 | def radar_plot(): 377 | """ 378 | radar plot 379 | """ 380 | # 生成测试数据 381 | labels = np.array(["A组", "B组", "C组", "D组", "E组", "F组"]) 382 | data = np.array([68, 83, 90, 77, 89, 73]) 383 | theta = np.linspace(0, 2*np.pi, len(data), endpoint=False) 384 | 385 | # 数据预处理 386 | data = np.concatenate((data, [data[0]])) 387 | theta = np.concatenate((theta, [theta[0]])) 388 | 389 | # 画图方式 390 | plt.subplot(111, polar=True) 391 | plt.title("雷达图", fontproperties=myfont) 392 | 393 | # 设置"theta grid"/"radar grid" 394 | plt.thetagrids(theta*(180/np.pi), labels=labels, fontproperties=myfont) 395 | plt.rgrids(np.arange(20, 100, 20), labels=np.arange(20, 100, 20), angle=0) 396 | plt.ylim(0, 100) 397 | 398 | # 画雷达图,并填充雷达图内部区域 399 | plt.plot(theta, data, "bo-", linewidth=2) 400 | plt.fill(theta, data, color="red", alpha=0.25) 401 | 402 | # 图形显示 403 | plt.show() 404 | return 405 | # radar_plot() 406 | 407 | 408 | def three_dimension_scatter(): 409 | """ 410 | 3d scatter plot 411 | """ 412 | # 生成测试数据 413 | x = np.random.random(100) 414 | y = np.random.random(100) 415 | z = np.random.random(100) 416 | color = np.random.random(100) 417 | scale = np.random.random(100) * 100 418 | 419 | # 生成画布(两种形式) 420 | fig = plt.figure() 421 | fig.suptitle("三维散点图", fontproperties=myfont) 422 | 423 | # ax = fig.gca(projection="3d") 424 | ax = fig.add_subplot(111, projection="3d") 425 | 426 | # 画三维散点图 427 | ax.scatter(x, y, z, s=scale, c=color, marker=".") 428 | 429 | # 设置坐标轴图标 430 | ax.set_xlabel("X Label") 431 | ax.set_ylabel("Y Label") 432 | ax.set_zlabel("Z Label") 433 | 434 | # 设置坐标轴范围 435 | ax.set_xlim(0, 1) 436 | ax.set_ylim(0, 1) 437 | ax.set_zlim(0, 1) 438 | 439 | # 图形显示 440 | plt.show() 441 | return 442 | # three_dimension_scatter() 443 | 444 | 445 | def three_dimension_line(): 446 | """ 447 | 3d line plot 448 | """ 449 | # 生成测试数据 450 | x = np.linspace(0, 1, 1000) 451 | y = np.linspace(0, 1, 1000) 452 | z = np.sin(x * 2 * np.pi) / (y + 0.1) 453 | 454 | # 生成画布(两种形式) 455 | fig = plt.figure() 456 | ax = fig.gca(projection="3d", title="plot title") 457 | # ax = fig.add_subplot(111, projection="3d", title="plot title") 458 | 459 | # 画三维折线图 460 | ax.plot(x, y, z, color="red", linestyle="-") 461 | 462 | # 设置坐标轴图标 463 | ax.set_xlabel("X Label") 464 | ax.set_ylabel("Y Label") 465 | ax.set_zlabel("Z Label") 466 | 467 | # 图形显示 468 | plt.show() 469 | return 470 | # three_dimension_line() 471 | 472 | 473 | def three_dimension_bar(): 474 | """ 475 | 3d bar plot 476 | """ 477 | # 生成测试数据(位置数据) 478 | xpos = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 479 | ypos = [2, 3, 4, 5, 1, 6, 2, 1, 7, 2] 480 | zpos = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 481 | 482 | # 生成测试数据(柱形参数) 483 | dx = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 484 | dy = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 485 | dz = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 486 | 487 | # 生成画布(两种形式) 488 | fig = plt.figure() 489 | ax = fig.gca(projection="3d", title="plot title") 490 | 491 | # 画三维柱状图 492 | ax.bar3d(xpos, ypos, zpos, dx, dy, dz, alpha=0.5) 493 | 494 | # 设置坐标轴图标 495 | ax.set_xlabel("X Label") 496 | ax.set_ylabel("Y Label") 497 | ax.set_zlabel("Z Label") 498 | 499 | # 图形显示 500 | plt.show() 501 | return 502 | # three_dimension_bar() 503 | -------------------------------------------------------------------------------- /python_visual_animation.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | """ 4 | python_visual_animation.py by xianhu 5 | """ 6 | 7 | import numpy as np 8 | import matplotlib 9 | import matplotlib.pyplot as plt 10 | import matplotlib.font_manager as fm 11 | from mpl_toolkits.mplot3d import Axes3D 12 | 13 | # 解决中文乱码问题 14 | myfont = fm.FontProperties(fname="/Library/Fonts/Songti.ttc", size=14) 15 | matplotlib.rcParams["axes.unicode_minus"] = False 16 | 17 | 18 | def simple_plot(): 19 | """ 20 | simple plot 21 | """ 22 | # 生成画布 23 | plt.figure(figsize=(8, 6), dpi=80) 24 | 25 | # 打开交互模式 26 | plt.ion() 27 | 28 | # 循环 29 | for index in range(100): 30 | # 清除原有图像 31 | plt.cla() 32 | 33 | # 设定标题等 34 | plt.title("动态曲线图", fontproperties=myfont) 35 | plt.grid(True) 36 | 37 | # 生成测试数据 38 | x = np.linspace(-np.pi + 0.1*index, np.pi+0.1*index, 256, endpoint=True) 39 | y_cos, y_sin = np.cos(x), np.sin(x) 40 | 41 | # 设置X轴 42 | plt.xlabel("X轴", fontproperties=myfont) 43 | plt.xlim(-4 + 0.1*index, 4 + 0.1*index) 44 | plt.xticks(np.linspace(-4 + 0.1*index, 4+0.1*index, 9, endpoint=True)) 45 | 46 | # 设置Y轴 47 | plt.ylabel("Y轴", fontproperties=myfont) 48 | plt.ylim(-1.0, 1.0) 49 | plt.yticks(np.linspace(-1, 1, 9, endpoint=True)) 50 | 51 | # 画两条曲线 52 | plt.plot(x, y_cos, "b--", linewidth=2.0, label="cos示例") 53 | plt.plot(x, y_sin, "g-", linewidth=2.0, label="sin示例") 54 | 55 | # 设置图例位置,loc可以为[upper, lower, left, right, center] 56 | plt.legend(loc="upper left", prop=myfont, shadow=True) 57 | 58 | # 暂停 59 | plt.pause(0.1) 60 | 61 | # 关闭交互模式 62 | plt.ioff() 63 | 64 | # 图形显示 65 | plt.show() 66 | return 67 | # simple_plot() 68 | 69 | 70 | def scatter_plot(): 71 | """ 72 | scatter plot 73 | """ 74 | # 打开交互模式 75 | plt.ion() 76 | 77 | # 循环 78 | for index in range(50): 79 | # 清除原有图像 80 | # plt.cla() 81 | 82 | # 设定标题等 83 | plt.title("动态散点图", fontproperties=myfont) 84 | plt.grid(True) 85 | 86 | # 生成测试数据 87 | point_count = 5 88 | x_index = np.random.random(point_count) 89 | y_index = np.random.random(point_count) 90 | 91 | # 设置相关参数 92 | color_list = np.random.random(point_count) 93 | scale_list = np.random.random(point_count) * 100 94 | 95 | # 画散点图 96 | plt.scatter(x_index, y_index, s=scale_list, c=color_list, marker="o") 97 | 98 | # 暂停 99 | plt.pause(0.2) 100 | 101 | # 关闭交互模式 102 | plt.ioff() 103 | 104 | # 显示图形 105 | plt.show() 106 | return 107 | # scatter_plot() 108 | 109 | 110 | def three_dimension_scatter(): 111 | """ 112 | 3d scatter plot 113 | """ 114 | # 生成画布 115 | fig = plt.figure() 116 | 117 | # 打开交互模式 118 | plt.ion() 119 | 120 | # 循环 121 | for index in range(50): 122 | # 清除原有图像 123 | fig.clf() 124 | 125 | # 设定标题等 126 | fig.suptitle("三维动态散点图", fontproperties=myfont) 127 | 128 | # 生成测试数据 129 | point_count = 100 130 | x = np.random.random(point_count) 131 | y = np.random.random(point_count) 132 | z = np.random.random(point_count) 133 | color = np.random.random(point_count) 134 | scale = np.random.random(point_count) * 100 135 | 136 | # 生成画布 137 | ax = fig.add_subplot(111, projection="3d") 138 | 139 | # 画三维散点图 140 | ax.scatter(x, y, z, s=scale, c=color, marker=".") 141 | 142 | # 设置坐标轴图标 143 | ax.set_xlabel("X Label") 144 | ax.set_ylabel("Y Label") 145 | ax.set_zlabel("Z Label") 146 | 147 | # 设置坐标轴范围 148 | ax.set_xlim(0, 1) 149 | ax.set_ylim(0, 1) 150 | ax.set_zlim(0, 1) 151 | 152 | # 暂停 153 | plt.pause(0.2) 154 | 155 | # 关闭交互模式 156 | plt.ioff() 157 | 158 | # 图形显示 159 | plt.show() 160 | return 161 | # three_dimension_scatter() 162 | -------------------------------------------------------------------------------- /python_wechat.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | """ 4 | python_wechat.py by xianhu 5 | 主要包括如下功能: 6 | (1) 自动提醒群红包 7 | (2) 自动监测被撤回消息 8 | (3) 群关键字提醒,群被@提醒 9 | """ 10 | 11 | import time 12 | import itchat 13 | import logging 14 | from itchat.content import * 15 | 16 | # 初始化 17 | my = itchat.new_instance() 18 | my.auto_login(hotReload=False, enableCmdQR=2) 19 | 20 | # my还包括的以下属性,注意用点.查看: 21 | # (1) alive 是否还活着,isLogging 是否已登陆 22 | # (2) loginInfo 登陆信息,其中的User属性为自己的信息User字典类,包括UserName, NickName, RemarkName, Sex(1 or 2), Signature, Province, City等 23 | # (3) memberList 通讯录列表,每一项为一个User字典类,包括UserName, NickName, RemarkName, Sex(1 or 2), Signature, Province, City等 24 | # (4) chatroomList 群聊列表,每一项为一个Chatroom字典类,包括UserName, NickName, RemarkName, MemberCount, MemberList, Self等 25 | # (5) mpList 订阅号列表,每一项为一个MassivePlatform字典类,包括UserName, NickName等 26 | 27 | my.global_keys = ["创业", "人工智能", "企业服务"] 28 | my.to_user_name = "filehelper" # 消息接受者 29 | my.update_time = time.time() # 信息更新时间 30 | my.msg_store = {} # 消息存储队列 31 | my.friends = {} # 好友字典列表 32 | my.groups = {} # 群聊字典列表 33 | 34 | 35 | def update_my_infos(): 36 | """ 37 | 更新信息 38 | """ 39 | # 获取并更新通讯录: {UserName: UserInstance} 40 | my.friends = {user["UserName"]: user for user in my.get_friends(update=True)} 41 | # 获取并更新群列表: {UserName: UserInstance} 42 | my.groups = {group["UserName"]: group for group in my.get_chatrooms(update=True)} 43 | return 44 | update_my_infos() 45 | 46 | 47 | class Message(object): 48 | """ 49 | 消息类 50 | """ 51 | def __init__(self, msg): 52 | """ 53 | 构造函数:提取消息内容 54 | 消息来源分类: 55 | (1)来自好友的消息 56 | (2)来自群的消息 57 | 提取消息内容,消息类型分类: 58 | (1)文字(2)图片(3)语音(4)视频(5)地址(6)名片(7)提醒(8)分享(9)附件 59 | """ 60 | # 更新信息,十分钟更新一次 61 | # logging.warning("message: %s", msg) 62 | if time.time() - my.update_time > 600: 63 | update_my_infos() 64 | my.update_time = time.time() 65 | 66 | self.msg_id = msg["MsgId"] # 消息ID 67 | self.from_user_name = msg["FromUserName"] # 消息发送者ID,如果为群消息,则为群ID 68 | 69 | self.msg_type = msg["MsgType"] # 消息类型,这里参考下边的we_type 70 | self.msg_content = msg["Content"] # 消息内容,这里参考下边的we_text 71 | self.msg_time = msg["CreateTime"] # 消息发送时间,时间戳格式 72 | 73 | self.msg_file = msg["FileName"] # 消息中所带文件的名称 74 | self.msg_file_length = msg["FileSize"] # 消息中所带文件的大小,字符串类型 75 | self.msg_voice_length = msg["VoiceLength"] # 消息中所带语音的长度(毫秒) 76 | self.msg_play_length = msg["PlayLength"] # 消息中所带视频的长度(秒) 77 | self.msg_url = msg["Url"] # 消息中所带链接的地址 78 | 79 | self.user_user_name = msg["User"].get("UserName", "") # 消息发送者ID,如果为群消息,则为群ID 80 | self.user_nick_name = msg["User"].get("NickName", "") # 消息发送者昵称,如果为群消息,则为群名 81 | self.user_remark_name = msg["User"].get("RemarkName", "") # 消息发送者备注名称,如果为群消息,则为群备注名称 82 | self.wind_name = self.user_remark_name if self.user_remark_name else ( 83 | self.user_nick_name if self.user_nick_name else ( 84 | my.friends[self.user_user_name]["NickName"] if self.user_user_name in my.friends else ( 85 | my.groups[self.user_user_name]["NickName"] if self.user_user_name in my.groups else "未知窗口" 86 | ) 87 | ) 88 | ) 89 | 90 | self.actual_user_name = msg.get("ActualUserName", "") # 群消息中,消息发送者的ID 91 | self.actual_nick_name = msg.get("ActualNickName", "") # 群消息中,消息发送者的群昵称 92 | self.actual_remark_name = self.actual_nick_name \ 93 | if (self.actual_user_name not in my.friends) or (not my.friends[self.actual_user_name]["RemarkName"]) \ 94 | else my.friends[self.actual_user_name]["RemarkName"] 95 | 96 | self.is_at = msg.get("IsAt", None) # 是否在群内被@ 97 | self.we_type = msg["Type"] # 消息类型 98 | self.we_text = msg["Text"] # 消息内容 99 | 100 | logging.warning("wind_name=%s, send_name=%s, we_type=%s, we_text=%s", self.wind_name, self.actual_remark_name, self.we_type, self.we_text) 101 | return 102 | 103 | 104 | def process_message_group(msg): 105 | """ 106 | 处理群消息 107 | """ 108 | # ==== 处理红包消息 ==== 109 | if msg.we_type == "Note" and msg.we_text.find("收到红包,请在手机上查看") >= 0: 110 | my.send("【%s】中有人发红包啦,快抢!" % msg.wind_name, toUserName=my.to_user_name) 111 | 112 | # ==== 处理关键词消息 ==== 113 | for key in my.global_keys: 114 | if msg.we_type == "Text" and msg.we_text.find(key) >= 0: 115 | my.send("【%s】中【%s】提及了关键字:%s" % (msg.wind_name, msg.actual_remark_name, key), toUserName=my.to_user_name) 116 | my.send(msg.we_text, toUserName=my.to_user_name) 117 | break 118 | 119 | # ==== 群内是否被@ ==== 120 | if msg.we_type == "Text" and msg.is_at: 121 | my.send("【%s】中【%s】@了你" % (msg.wind_name, msg.actual_remark_name), toUserName=my.to_user_name) 122 | my.send(msg.we_text, toUserName=my.to_user_name) 123 | return 124 | 125 | 126 | def process_message_revoke(msg): 127 | """ 128 | 处理撤回消息 129 | """ 130 | # 消息存储,删除过期消息 131 | my.msg_store[msg.msg_id] = msg 132 | for _id in [_id for _id in my.msg_store if time.time() - my.msg_store[_id].msg_time > 120]: 133 | my.msg_store.pop(_id) 134 | 135 | # 保存消息中的内容(图片、语音等) 136 | if msg.we_type in ["Picture", "Recording"]: 137 | try: 138 | msg.we_text(".Cache/" + msg.msg_file) 139 | logging.warning("process_message_revoke: download %s to .Cache/", msg.msg_file) 140 | except Exception as excep: 141 | logging.error("process_message_revoke: download %s to .Cache/ error: %s", msg.msg_file, excep) 142 | 143 | # ==== 撤回消息处理(必须为最后一步) ==== 144 | if msg.we_type == "Note" and msg.we_text.find("撤回了一条消息") >= 0: 145 | old_msg = my.msg_store.get(msg.msg_content[msg.msg_content.find("<msgid>")+7: msg.msg_content.find("</msgid>")]) 146 | if not old_msg: 147 | logging.warning("process_message_revoke: no message id in my.msg_store") 148 | return 149 | 150 | if old_msg.from_user_name.startswith("@@"): 151 | my.send("【%s】中【%s】撤回了自己发送的消息:\nType: %s\n%s" % 152 | (old_msg.wind_name, old_msg.actual_remark_name, old_msg.we_type, old_msg.msg_file), toUserName=my.to_user_name) 153 | else: 154 | my.send("【%s】撤回了自己发送的消息:\nType: %s\n%s" % 155 | (old_msg.wind_name, old_msg.we_type, old_msg.msg_file), toUserName=my.to_user_name) 156 | 157 | if old_msg.we_type in ["Text", "Card"]: 158 | my.send(str(old_msg.we_text), toUserName=my.to_user_name) 159 | elif old_msg.we_type == "Sharing": 160 | my.send(old_msg.we_text + "\n" + old_msg.msg_url, toUserName=my.to_user_name) 161 | elif old_msg.we_type == "Picture": 162 | my.send_image(".Cache/" + old_msg.msg_file, toUserName=my.to_user_name) 163 | elif old_msg.we_type == "Recording": 164 | my.send_file(".Cache/" + old_msg.msg_file, toUserName=my.to_user_name) 165 | return 166 | 167 | 168 | @my.msg_register([TEXT, PICTURE, RECORDING, VIDEO, MAP, CARD, NOTE, SHARING, ATTACHMENT], isFriendChat=True, isGroupChat=True) 169 | def text_reply(msg): 170 | """ 171 | 消息自动接收, 接受全部的消息(自己发送的消息除外) 172 | """ 173 | # 跳过来自自己的消息 174 | if msg["FromUserName"] == my.loginInfo["User"]["UserName"]: 175 | return 176 | 177 | # 消息提取 178 | msg = Message(msg) 179 | 180 | # 消息过滤, 只监测文字、图片、语音、名片、注解、分享等 181 | if msg.we_type not in ["Text", "Picture", "Recording", "Card", "Note", "Sharing"]: 182 | logging.warning("process_message_group: message type isn't included, ignored") 183 | return 184 | 185 | # 处理群消息 186 | if msg.from_user_name.startswith("@@"): 187 | process_message_group(msg) 188 | 189 | # 处理撤回消息 190 | process_message_revoke(msg) 191 | return 192 | 193 | 194 | # 运行程序 195 | my.run(debug=False) 196 | 197 | """ 198 | 好友消息: 199 | { 200 | 'MsgId': '5254859004542036569', 201 | 'FromUserName': '@f3b7fdc54717ea8dc22cb3edef59688e82ef34874e3236801537b94f6cd73e1e', 202 | 'ToUserName': '@e79dde912b8f817514c01f399ca9ba12', 203 | 'MsgType': 1, 204 | 'Content': '[微笑]己改', 205 | 'Status': 3, 206 | 'ImgStatus': 1, 207 | 'CreateTime': 1498448860, 208 | 'VoiceLength': 0, 209 | 'PlayLength': 0, 210 | 'FileName': '', 211 | 'FileSize': '', 212 | 'MediaId': '', 213 | 'Url': '', 214 | 'AppMsgType': 0, 215 | 'StatusNotifyCode': 0, 216 | 'StatusNotifyUserName': '', 217 | 'HasProductId': 0, 218 | 'Ticket': '', 219 | 'ImgHeight': 0, 220 | 'ImgWidth': 0, 221 | 'SubMsgType': 0, 222 | 'NewMsgId': 5254859004542036569, 223 | 'OriContent': '', 224 | 'User': <User: { 225 | 'MemberList': <ContactList: []>, 226 | 'Uin': 0, 227 | 'UserName': '@f3b7fdc54717ea8dc22cb3edef59688e82ef34874e3236801537b94f6cd73e1e', 228 | 'NickName': '付贵吉祥', 229 | 'HeadImgUrl': '/cgi-bin/mmwebwx-bin/webwxgeticon?seq=688475226&username=@f3b7fdc54717ea8dc22cb3edef59688e82ef34874e3236801537b94f6cd73e1e&skey=@', 230 | 'ContactFlag': 3, 231 | 'MemberCount': 0, 232 | 'RemarkName': '付贵吉祥@中建5号楼', 233 | 'HideInputBarFlag': 0, 234 | 'Sex': 1, 235 | 'Signature': '漫漫人生路...', 236 | 'VerifyFlag': 0, 237 | 'OwnerUin': 0, 238 | 'PYInitial': 'FGJX', 239 | 'PYQuanPin': 'fuguijixiang', 240 | 'RemarkPYInitial': 'FGJXZJ5HL', 241 | 'RemarkPYQuanPin': 'fuguijixiangzhongjian5haolou', 242 | 'StarFriend': 0, 243 | 'AppAccountFlag': 0, 244 | 'Statues': 0, 245 | 'AttrStatus': 135205, 246 | 'Province': '山东', 247 | 'City': '', 248 | 'Alias': '', 249 | 'SnsFlag': 17, 250 | 'UniFriend': 0, 251 | 'DisplayName': '', 252 | 'ChatRoomId': 0, 253 | 'KeyWord': '', 254 | 'EncryChatRoomId': '', 255 | 'IsOwner': 0 256 | }>, 257 | 'Type': 'Text', 258 | 'Text': '[微笑]己改' 259 | } 260 | """ 261 | 262 | """ 263 | 群消息: 264 | { 265 | 'MsgId': '7844877618948840992', 266 | 'FromUserName': '@@8dc5df044444d1fb8e3972e755b47adf9d07f5a032cae90a4d822b74ee1e4880', 267 | 'ToUserName': '@e79dde912b8f817514c01f399ca9ba12', 268 | 'MsgType': 1, 269 | 'Content': '就是那个,那个协议我们手上有吗', 270 | 'Status': 3, 271 | 'ImgStatus': 1, 272 | 'CreateTime': 1498448972, 273 | 'VoiceLength': 0, 274 | 'PlayLength': 0, 275 | 'FileName': '', 276 | 'FileSize': '', 277 | 'MediaId': '', 278 | 'Url': '', 279 | 'AppMsgType': 0, 280 | 'StatusNotifyCode': 0, 281 | 'StatusNotifyUserName': '', 282 | 'HasProductId': 0, 283 | 'Ticket': '', 284 | 'ImgHeight': 0, 285 | 'ImgWidth': 0, 286 | 'SubMsgType': 0, 287 | 'NewMsgId': 7844877618948840992, 288 | 'OriContent': '', 289 | 'ActualNickName': '5-1-1003', 290 | 'IsAt': False, 291 | 'ActualUserName': '@a0922f18795e4c3b6d7d09c492ace233', 292 | 'User': <Chatroom: { 293 | 'MemberList': <ContactList: [ 294 | <ChatroomMember: { 295 | 'MemberList': <ContactList: []>, 296 | 'Uin': 0, 297 | 'UserName': '@e79dde912b8f817514c01f399ca9ba12', 298 | 'NickName': '齐现虎', 299 | 'AttrStatus': 2147600869, 300 | 'PYInitial': '', 301 | 'PYQuanPin': '', 302 | 'RemarkPYInitial': '', 303 | 'RemarkPYQuanPin': '', 304 | 'MemberStatus': 0, 305 | 'DisplayName': '5-1-1601', 306 | 'KeyWord': 'qix' 307 | }>, 308 | <ChatroomMember: { 309 | 'MemberList': <ContactList: []>, 310 | 'Uin': 0, 311 | 'UserName': '@a9620e3d4b82eab2521ccdbb985afc37', 312 | 'NickName': 'A高佳祥15069179911', 313 | 'AttrStatus': 102503, 314 | 'PYInitial': '', 315 | 'PYQuanPin': '', 316 | 'RemarkPYInitial': '', 317 | 'RemarkPYQuanPin': '', 318 | 'MemberStatus': 0, 319 | 'DisplayName': '5-2-220315069179911', 320 | 'KeyWord': 'gao' 321 | }>, 322 | ....... 323 | ]>, 324 | 'Uin': 0, 325 | 'UserName': '@@8dc5df044444d1fb8e3972e755b47adf9d07f5a032cae90a4d822b74ee1e4880', 326 | 'NickName': '中建锦绣澜庭二期5#楼', 327 | 'HeadImgUrl': '/cgi-bin/mmwebwx-bin/webwxgetheadimg?seq=0&username=@@8dc5df044444d1fb8e3972e755b47adf9d07f5a032cae90a4d822b74ee1e4880&skey=@', 328 | 'ContactFlag': 3, 329 | 'MemberCount': 106, 330 | 'RemarkName': '', 331 | 'HideInputBarFlag': 0, 332 | 'Sex': 0, 333 | 'Signature': '', 334 | 'VerifyFlag': 0, 335 | 'OwnerUin': 0, 336 | 'PYInitial': 'ZJJXLTEJ5L', 337 | 'PYQuanPin': 'zhongjianjinxiulantingerji5lou', 338 | 'RemarkPYInitial': '', 339 | 'RemarkPYQuanPin': '', 340 | 'StarFriend': 0, 341 | 'AppAccountFlag': 0, 342 | 'Statues': 0, 343 | 'AttrStatus': 0, 344 | 'Province': '', 345 | 'City': '', 346 | 'Alias': '', 347 | 'SnsFlag': 0, 348 | 'UniFriend': 0, 349 | 'DisplayName': '', 350 | 'ChatRoomId': 0, 351 | 'KeyWord': '', 352 | 'EncryChatRoomId': '@d1e510bc8cbd192468e9c85c6f5a9d81', 353 | 'IsOwner': 1, 354 | 'IsAdmin': None, 355 | 'Self': <ChatroomMember: { 356 | 'MemberList': <ContactList: []>, 357 | 'Uin': 0, 358 | 'UserName': '@e79dde912b8f817514c01f399ca9ba12', 359 | 'NickName': '齐现虎', 360 | 'AttrStatus': 2147600869, 361 | 'PYInitial': '', 362 | 'PYQuanPin': '', 363 | 'RemarkPYInitial': '', 364 | 'RemarkPYQuanPin': '', 365 | 'MemberStatus': 0, 366 | 'DisplayName': '5-1-1601', 367 | 'KeyWord': 'qix' 368 | }>, 369 | 'HeadImgUpdateFlag': 1, 370 | 'ContactType': 0, 371 | 'ChatRoomOwner': '@e79dde912b8f817514c01f399ca9ba12' 372 | }>, 373 | 'Type': 'Text', 374 | 'Text': '就是那个,那个协议我们手上有吗' 375 | } 376 | 377 | 警示消息:好友类 378 | { 379 | 'MsgId': '1529895072288746571', 380 | 'FromUserName': '@4076708be2e09ef83f249f168553d0dd55b4f734aee7d276e92ddbe98625476a', 381 | 'ToUserName': '@f97583d8ffbaee6189854116897c677f', 382 | 'MsgType': 10000, 383 | 'Content': '你已添加了呼啸而过的小青春,现在可以开始聊天了。', 384 | 'Status': 4, 385 | 'ImgStatus': 1, 386 | 'CreateTime': 1498533407, 387 | 'VoiceLength': 0, 388 | 'PlayLength': 0, 389 | 'FileName': '', 390 | 'FileSize': '', 391 | 'MediaId': '', 392 | 'Url': '', 393 | 'AppMsgType': 0, 394 | 'StatusNotifyCode': 0, 395 | 'StatusNotifyUserName': '', 396 | 'HasProductId': 0, 397 | 'Ticket': '', 398 | 'ImgHeight': 0, 399 | 'ImgWidth': 0, 400 | 'SubMsgType': 0, 401 | 'NewMsgId': 1529895072288746571, 402 | 'OriContent': '', 403 | 'User': <User: { 404 | 'userName': '@4076708be2e09ef83f249f168553d0dd55b4f734aee7d276e92ddbe98625476a', 405 | 'MemberList': <ContactList: []> 406 | }>, 407 | 'Type': 'Note', 408 | 'Text': '你已添加了呼啸而过的小青春,现在可以开始聊天了。' 409 | } 410 | 411 | 警示消息:群类 412 | { 413 | 'MsgId': '1049646282086057263', 414 | 'FromUserName': '@@300f57b68ca7ef593ae3221eef7dba5377466c86122aaa15a8ffc1031310e210', 415 | 'ToUserName': '@006f63e8086ab07fcbe3771dc824c4a6', 416 | 'MsgType': 10000, 417 | 'Content': '你邀请"大姐"加入了群聊', 418 | 'Status': 3, 419 | 'ImgStatus': 1, 420 | 'CreateTime': 1498533901, 421 | 'VoiceLength': 0, 422 | 'PlayLength': 0, 423 | 'FileName': '', 424 | 'FileSize': '', 425 | 'MediaId': '', 426 | 'Url': '', 427 | 'AppMsgType': 0, 428 | 'StatusNotifyCode': 0, 429 | 'StatusNotifyUserName': '', 430 | 'HasProductId': 0, 431 | 'Ticket': '', 432 | 'ImgHeight': 0, 433 | 'ImgWidth': 0, 434 | 'SubMsgType': 0, 435 | 'NewMsgId': 1049646282086057263, 436 | 'OriContent': '', 437 | 'ActualUserName': '@006f63e8086ab07fcbe3771dc824c4a6', 438 | 'ActualNickName': '某某某', 439 | 'IsAt': False, 440 | 'User': <Chatroom: { 441 | 'UserName': '@@300f57b68ca7ef593ae3221eef7dba5377466c86122aaa15a8ffc1031310e210', 442 | 'MemberList': <ContactList: []> 443 | }>, 444 | 'Type': 'Note', 445 | 'Text': '你邀请"大姐"加入了群聊' 446 | } 447 | """ 448 | -------------------------------------------------------------------------------- /python_weibo.py: -------------------------------------------------------------------------------- 1 | # _*_ coding: utf-8 _*_ 2 | 3 | """ 4 | python_weibo.py by xianhu 5 | """ 6 | 7 | import re 8 | import rsa 9 | import time 10 | import json 11 | import base64 12 | import logging 13 | import binascii 14 | import requests 15 | import urllib.parse 16 | 17 | 18 | class WeiBoLogin(object): 19 | """ 20 | class of WeiBoLogin, to login weibo.com 21 | """ 22 | 23 | def __init__(self): 24 | """ 25 | constructor 26 | """ 27 | self.user_name = None 28 | self.pass_word = None 29 | self.user_uniqueid = None 30 | self.user_nick = None 31 | 32 | self.session = requests.Session() 33 | self.session.headers.update({"User-Agent": "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0"}) 34 | self.session.get("http://weibo.com/login.php") 35 | return 36 | 37 | def login(self, user_name, pass_word): 38 | """ 39 | login weibo.com, return True or False 40 | """ 41 | self.user_name = user_name 42 | self.pass_word = pass_word 43 | self.user_uniqueid = None 44 | self.user_nick = None 45 | 46 | # get json data 47 | s_user_name = self.get_username() 48 | json_data = self.get_json_data(su_value=s_user_name) 49 | if not json_data: 50 | return False 51 | s_pass_word = self.get_password(json_data["servertime"], json_data["nonce"], json_data["pubkey"]) 52 | 53 | # make post_data 54 | post_data = { 55 | "entry": "weibo", 56 | "gateway": "1", 57 | "from": "", 58 | "savestate": "7", 59 | "userticket": "1", 60 | "vsnf": "1", 61 | "service": "miniblog", 62 | "encoding": "UTF-8", 63 | "pwencode": "rsa2", 64 | "sr": "1280*800", 65 | "prelt": "529", 66 | "url": "http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack", 67 | "rsakv": json_data["rsakv"], 68 | "servertime": json_data["servertime"], 69 | "nonce": json_data["nonce"], 70 | "su": s_user_name, 71 | "sp": s_pass_word, 72 | "returntype": "TEXT", 73 | } 74 | 75 | # get captcha code 76 | if json_data["showpin"] == 1: 77 | url = "http://login.sina.com.cn/cgi/pin.php?r=%d&s=0&p=%s" % (int(time.time()), json_data["pcid"]) 78 | with open("captcha.jpeg", "wb") as file_out: 79 | file_out.write(self.session.get(url).content) 80 | code = input("请输入验证码:") 81 | post_data["pcid"] = json_data["pcid"] 82 | post_data["door"] = code 83 | 84 | # login weibo.com 85 | login_url_1 = "http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)&_=%d" % int(time.time()) 86 | json_data_1 = self.session.post(login_url_1, data=post_data).json() 87 | if json_data_1["retcode"] == "0": 88 | params = { 89 | "callback": "sinaSSOController.callbackLoginStatus", 90 | "client": "ssologin.js(v1.4.18)", 91 | "ticket": json_data_1["ticket"], 92 | "ssosavestate": int(time.time()), 93 | "_": int(time.time()*1000), 94 | } 95 | response = self.session.get("https://passport.weibo.com/wbsso/login", params=params) 96 | json_data_2 = json.loads(re.search(r"\((?P<result>.*)\)", response.text).group("result")) 97 | if json_data_2["result"] is True: 98 | self.user_uniqueid = json_data_2["userinfo"]["uniqueid"] 99 | self.user_nick = json_data_2["userinfo"]["displayname"] 100 | logging.warning("WeiBoLogin succeed: %s", json_data_2) 101 | else: 102 | logging.warning("WeiBoLogin failed: %s", json_data_2) 103 | else: 104 | logging.warning("WeiBoLogin failed: %s", json_data_1) 105 | return True if self.user_uniqueid and self.user_nick else False 106 | 107 | def get_username(self): 108 | """ 109 | get legal username 110 | """ 111 | username_quote = urllib.parse.quote_plus(self.user_name) 112 | username_base64 = base64.b64encode(username_quote.encode("utf-8")) 113 | return username_base64.decode("utf-8") 114 | 115 | def get_json_data(self, su_value): 116 | """ 117 | get the value of "servertime", "nonce", "pubkey", "rsakv" and "showpin", etc 118 | """ 119 | params = { 120 | "entry": "weibo", 121 | "callback": "sinaSSOController.preloginCallBack", 122 | "rsakt": "mod", 123 | "checkpin": "1", 124 | "client": "ssologin.js(v1.4.18)", 125 | "su": su_value, 126 | "_": int(time.time()*1000), 127 | } 128 | try: 129 | response = self.session.get("http://login.sina.com.cn/sso/prelogin.php", params=params) 130 | json_data = json.loads(re.search(r"\((?P<data>.*)\)", response.text).group("data")) 131 | except Exception as excep: 132 | json_data = {} 133 | logging.error("WeiBoLogin get_json_data error: %s", excep) 134 | 135 | logging.debug("WeiBoLogin get_json_data: %s", json_data) 136 | return json_data 137 | 138 | def get_password(self, servertime, nonce, pubkey): 139 | """ 140 | get legal password 141 | """ 142 | string = (str(servertime) + "\t" + str(nonce) + "\n" + str(self.pass_word)).encode("utf-8") 143 | public_key = rsa.PublicKey(int(pubkey, 16), int("10001", 16)) 144 | password = rsa.encrypt(string, public_key) 145 | password = binascii.b2a_hex(password) 146 | return password.decode() 147 | 148 | 149 | if __name__ == "__main__": 150 | logging.basicConfig(level=logging.DEBUG, format="%(asctime)s\t%(levelname)s\t%(message)s") 151 | weibo = WeiBoLogin() 152 | weibo.login("username", "password") 153 | --------------------------------------------------------------------------------